1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 PetscCall(MatGetSize(A,&nr,&nc)); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 PetscCall(PetscMalloc1(ulw,&uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 PetscCall(PetscMalloc1(n,&sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 PetscCall(PetscMalloc1(nr*nr,&U)); 46 PetscCall(PetscBLASIntCast(nr,&bM)); 47 PetscCall(PetscBLASIntCast(nc,&bN)); 48 PetscCall(PetscBLASIntCast(ulw,&lwork)); 49 PetscCall(MatDenseGetArray(A,&data)); 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 53 #else 54 PetscCall(PetscMalloc1(5*n,&rwork2)); 55 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 56 PetscCall(PetscFree(rwork2)); 57 #endif 58 PetscCall(PetscFPTrapPop()); 59 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 60 PetscCall(MatDenseRestoreArray(A,&data)); 61 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 62 if (!rwork) { 63 PetscCall(PetscFree(sing)); 64 } 65 if (!work) { 66 PetscCall(PetscFree(uwork)); 67 } 68 /* create B */ 69 if (!range) { 70 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B)); 71 PetscCall(MatDenseGetArray(*B,&data)); 72 PetscCall(PetscArraycpy(data,U+nr*i,(nr-i)*nr)); 73 } else { 74 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B)); 75 PetscCall(MatDenseGetArray(*B,&data)); 76 PetscCall(PetscArraycpy(data,U,i*nr)); 77 } 78 PetscCall(MatDenseRestoreArray(*B,&data)); 79 PetscCall(PetscFree(U)); 80 PetscFunctionReturn(0); 81 } 82 83 /* TODO REMOVE */ 84 #if defined(PRINT_GDET) 85 static int inc = 0; 86 static int lev = 0; 87 #endif 88 89 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 90 { 91 Mat GE,GEd; 92 PetscInt rsize,csize,esize; 93 PetscScalar *ptr; 94 95 PetscFunctionBegin; 96 PetscCall(ISGetSize(edge,&esize)); 97 if (!esize) PetscFunctionReturn(0); 98 PetscCall(ISGetSize(extrow,&rsize)); 99 PetscCall(ISGetSize(extcol,&csize)); 100 101 /* gradients */ 102 ptr = work + 5*esize; 103 PetscCall(MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE)); 104 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins)); 105 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins)); 106 PetscCall(MatDestroy(&GE)); 107 108 /* constants */ 109 ptr += rsize*csize; 110 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd)); 111 PetscCall(MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE)); 112 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd)); 113 PetscCall(MatDestroy(&GE)); 114 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins)); 115 PetscCall(MatDestroy(&GEd)); 116 117 if (corners) { 118 Mat GEc; 119 const PetscScalar *vals; 120 PetscScalar v; 121 122 PetscCall(MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc)); 123 PetscCall(MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd)); 124 PetscCall(MatDenseGetArrayRead(GEd,&vals)); 125 /* v = PetscAbsScalar(vals[0]) */; 126 v = 1.; 127 cvals[0] = vals[0]/v; 128 cvals[1] = vals[1]/v; 129 PetscCall(MatDenseRestoreArrayRead(GEd,&vals)); 130 PetscCall(MatScale(*GKins,1./v)); 131 #if defined(PRINT_GDET) 132 { 133 PetscViewer viewer; 134 char filename[256]; 135 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 136 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 137 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 138 PetscCall(PetscObjectSetName((PetscObject)GEc,"GEc")); 139 PetscCall(MatView(GEc,viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)(*GKins),"GK")); 141 PetscCall(MatView(*GKins,viewer)); 142 PetscCall(PetscObjectSetName((PetscObject)GEd,"Gproj")); 143 PetscCall(MatView(GEd,viewer)); 144 PetscCall(PetscViewerDestroy(&viewer)); 145 } 146 #endif 147 PetscCall(MatDestroy(&GEd)); 148 PetscCall(MatDestroy(&GEc)); 149 } 150 151 PetscFunctionReturn(0); 152 } 153 154 PetscErrorCode PCBDDCNedelecSupport(PC pc) 155 { 156 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 157 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 158 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 159 Vec tvec; 160 PetscSF sfv; 161 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 162 MPI_Comm comm; 163 IS lned,primals,allprimals,nedfieldlocal; 164 IS *eedges,*extrows,*extcols,*alleedges; 165 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 166 PetscScalar *vals,*work; 167 PetscReal *rwork; 168 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 169 PetscInt ne,nv,Lv,order,n,field; 170 PetscInt n_neigh,*neigh,*n_shared,**shared; 171 PetscInt i,j,extmem,cum,maxsize,nee; 172 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 173 PetscInt *sfvleaves,*sfvroots; 174 PetscInt *corners,*cedges; 175 PetscInt *ecount,**eneighs,*vcount,**vneighs; 176 PetscInt *emarks; 177 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 178 179 PetscFunctionBegin; 180 /* If the discrete gradient is defined for a subset of dofs and global is true, 181 it assumes G is given in global ordering for all the dofs. 182 Otherwise, the ordering is global for the Nedelec field */ 183 order = pcbddc->nedorder; 184 conforming = pcbddc->conforming; 185 field = pcbddc->nedfield; 186 global = pcbddc->nedglobal; 187 setprimal = PETSC_FALSE; 188 print = PETSC_FALSE; 189 singular = PETSC_FALSE; 190 191 /* Command line customization */ 192 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC"); 193 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL)); 194 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL)); 195 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL)); 196 /* print debug info TODO: to be removed */ 197 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL)); 198 PetscOptionsEnd(); 199 200 /* Return if there are no edges in the decomposition and the problem is not singular */ 201 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&al2g,NULL)); 202 PetscCall(ISLocalToGlobalMappingGetSize(al2g,&n)); 203 PetscCall(PetscObjectGetComm((PetscObject)pc,&comm)); 204 if (!singular) { 205 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 206 lrc[0] = PETSC_FALSE; 207 for (i=0;i<n;i++) { 208 if (PetscRealPart(vals[i]) > 2.) { 209 lrc[0] = PETSC_TRUE; 210 break; 211 } 212 } 213 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 214 PetscCall(MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm)); 215 if (!lrc[1]) PetscFunctionReturn(0); 216 } 217 218 /* Get Nedelec field */ 219 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal,comm,PETSC_ERR_USER,"Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT,field,pcbddc->n_ISForDofsLocal); 220 if (pcbddc->n_ISForDofsLocal && field >= 0) { 221 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 222 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 223 PetscCall(ISGetLocalSize(nedfieldlocal,&ne)); 224 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 225 ne = n; 226 nedfieldlocal = NULL; 227 global = PETSC_TRUE; 228 } else if (field == PETSC_DECIDE) { 229 PetscInt rst,ren,*idx; 230 231 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 232 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 233 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren)); 234 for (i=rst;i<ren;i++) { 235 PetscInt nc; 236 237 PetscCall(MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 238 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 239 PetscCall(MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 240 } 241 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 242 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 243 PetscCall(PetscMalloc1(n,&idx)); 244 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 245 PetscCall(ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal)); 246 } else { 247 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 248 } 249 250 /* Sanity checks */ 251 PetscCheck(order || conforming,comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 252 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix,comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 253 PetscCheck(!order || (ne%order == 0),PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT,ne,order); 254 255 /* Just set primal dofs and return */ 256 if (setprimal) { 257 IS enedfieldlocal; 258 PetscInt *eidxs; 259 260 PetscCall(PetscMalloc1(ne,&eidxs)); 261 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 262 if (nedfieldlocal) { 263 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 264 for (i=0,cum=0;i<ne;i++) { 265 if (PetscRealPart(vals[idxs[i]]) > 2.) { 266 eidxs[cum++] = idxs[i]; 267 } 268 } 269 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 270 } else { 271 for (i=0,cum=0;i<ne;i++) { 272 if (PetscRealPart(vals[i]) > 2.) { 273 eidxs[cum++] = i; 274 } 275 } 276 } 277 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 278 PetscCall(ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal)); 279 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal)); 280 PetscCall(PetscFree(eidxs)); 281 PetscCall(ISDestroy(&nedfieldlocal)); 282 PetscCall(ISDestroy(&enedfieldlocal)); 283 PetscFunctionReturn(0); 284 } 285 286 /* Compute some l2g maps */ 287 if (nedfieldlocal) { 288 IS is; 289 290 /* need to map from the local Nedelec field to local numbering */ 291 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g)); 292 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 293 PetscCall(ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is)); 294 PetscCall(ISLocalToGlobalMappingCreateIS(is,&al2g)); 295 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 296 if (global) { 297 PetscCall(PetscObjectReference((PetscObject)al2g)); 298 el2g = al2g; 299 } else { 300 IS gis; 301 302 PetscCall(ISRenumber(is,NULL,NULL,&gis)); 303 PetscCall(ISLocalToGlobalMappingCreateIS(gis,&el2g)); 304 PetscCall(ISDestroy(&gis)); 305 } 306 PetscCall(ISDestroy(&is)); 307 } else { 308 /* restore default */ 309 pcbddc->nedfield = -1; 310 /* one ref for the destruction of al2g, one for el2g */ 311 PetscCall(PetscObjectReference((PetscObject)al2g)); 312 PetscCall(PetscObjectReference((PetscObject)al2g)); 313 el2g = al2g; 314 fl2g = NULL; 315 } 316 317 /* Start communication to drop connections for interior edges (for cc analysis only) */ 318 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 319 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 320 if (nedfieldlocal) { 321 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 322 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 323 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 324 } else { 325 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 326 } 327 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 328 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 329 330 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 331 PetscCall(MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G)); 332 PetscCall(MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 333 if (global) { 334 PetscInt rst; 335 336 PetscCall(MatGetOwnershipRange(G,&rst,NULL)); 337 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 338 if (matis->sf_rootdata[i] < 2) { 339 matis->sf_rootdata[cum++] = i + rst; 340 } 341 } 342 PetscCall(MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE)); 343 PetscCall(MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL)); 344 } else { 345 PetscInt *tbz; 346 347 PetscCall(PetscMalloc1(ne,&tbz)); 348 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 349 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 350 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 351 for (i=0,cum=0;i<ne;i++) 352 if (matis->sf_leafdata[idxs[i]] == 1) 353 tbz[cum++] = i; 354 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 355 PetscCall(ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz)); 356 PetscCall(MatZeroRows(G,cum,tbz,0.,NULL,NULL)); 357 PetscCall(PetscFree(tbz)); 358 } 359 } else { /* we need the entire G to infer the nullspace */ 360 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 361 G = pcbddc->discretegradient; 362 } 363 364 /* Extract subdomain relevant rows of G */ 365 PetscCall(ISLocalToGlobalMappingGetIndices(el2g,&idxs)); 366 PetscCall(ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned)); 367 PetscCall(MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall)); 368 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g,&idxs)); 369 PetscCall(ISDestroy(&lned)); 370 PetscCall(MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis)); 371 PetscCall(MatDestroy(&lGall)); 372 PetscCall(MatISGetLocalMat(lGis,&lG)); 373 374 /* SF for nodal dofs communications */ 375 PetscCall(MatGetLocalSize(G,NULL,&Lv)); 376 PetscCall(MatISGetLocalToGlobalMapping(lGis,NULL,&vl2g)); 377 PetscCall(PetscObjectReference((PetscObject)vl2g)); 378 PetscCall(ISLocalToGlobalMappingGetSize(vl2g,&nv)); 379 PetscCall(PetscSFCreate(comm,&sfv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g,&idxs)); 381 PetscCall(PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs)); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs)); 383 i = singular ? 2 : 1; 384 PetscCall(PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots)); 385 386 /* Destroy temporary G created in MATIS format and modified G */ 387 PetscCall(PetscObjectReference((PetscObject)lG)); 388 PetscCall(MatDestroy(&lGis)); 389 PetscCall(MatDestroy(&G)); 390 391 if (print) { 392 PetscCall(PetscObjectSetName((PetscObject)lG,"initial_lG")); 393 PetscCall(MatView(lG,NULL)); 394 } 395 396 /* Save lG for values insertion in change of basis */ 397 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGinit)); 398 399 /* Analyze the edge-nodes connections (duplicate lG) */ 400 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGe)); 401 PetscCall(MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 402 PetscCall(PetscBTCreate(nv,&btv)); 403 PetscCall(PetscBTCreate(ne,&bte)); 404 PetscCall(PetscBTCreate(ne,&btb)); 405 PetscCall(PetscBTCreate(ne,&btbd)); 406 PetscCall(PetscBTCreate(nv,&btvcand)); 407 /* need to import the boundary specification to ensure the 408 proper detection of coarse edges' endpoints */ 409 if (pcbddc->DirichletBoundariesLocal) { 410 IS is; 411 412 if (fl2g) { 413 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is)); 414 } else { 415 is = pcbddc->DirichletBoundariesLocal; 416 } 417 PetscCall(ISGetLocalSize(is,&cum)); 418 PetscCall(ISGetIndices(is,&idxs)); 419 for (i=0;i<cum;i++) { 420 if (idxs[i] >= 0) { 421 PetscCall(PetscBTSet(btb,idxs[i])); 422 PetscCall(PetscBTSet(btbd,idxs[i])); 423 } 424 } 425 PetscCall(ISRestoreIndices(is,&idxs)); 426 if (fl2g) { 427 PetscCall(ISDestroy(&is)); 428 } 429 } 430 if (pcbddc->NeumannBoundariesLocal) { 431 IS is; 432 433 if (fl2g) { 434 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is)); 435 } else { 436 is = pcbddc->NeumannBoundariesLocal; 437 } 438 PetscCall(ISGetLocalSize(is,&cum)); 439 PetscCall(ISGetIndices(is,&idxs)); 440 for (i=0;i<cum;i++) { 441 if (idxs[i] >= 0) { 442 PetscCall(PetscBTSet(btb,idxs[i])); 443 } 444 } 445 PetscCall(ISRestoreIndices(is,&idxs)); 446 if (fl2g) { 447 PetscCall(ISDestroy(&is)); 448 } 449 } 450 451 /* Count neighs per dof */ 452 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs)); 453 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs)); 454 455 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 456 for proper detection of coarse edges' endpoints */ 457 PetscCall(PetscBTCreate(ne,&btee)); 458 for (i=0;i<ne;i++) { 459 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 460 PetscCall(PetscBTSet(btee,i)); 461 } 462 } 463 PetscCall(PetscMalloc1(ne,&marks)); 464 if (!conforming) { 465 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 466 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 467 } 468 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 469 PetscCall(MatSeqAIJGetArray(lGe,&vals)); 470 cum = 0; 471 for (i=0;i<ne;i++) { 472 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 473 if (!PetscBTLookup(btee,i)) { 474 marks[cum++] = i; 475 continue; 476 } 477 /* set badly connected edge dofs as primal */ 478 if (!conforming) { 479 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 480 marks[cum++] = i; 481 PetscCall(PetscBTSet(bte,i)); 482 for (j=ii[i];j<ii[i+1];j++) { 483 PetscCall(PetscBTSet(btv,jj[j])); 484 } 485 } else { 486 /* every edge dofs should be connected trough a certain number of nodal dofs 487 to other edge dofs belonging to coarse edges 488 - at most 2 endpoints 489 - order-1 interior nodal dofs 490 - no undefined nodal dofs (nconn < order) 491 */ 492 PetscInt ends = 0,ints = 0, undef = 0; 493 for (j=ii[i];j<ii[i+1];j++) { 494 PetscInt v = jj[j],k; 495 PetscInt nconn = iit[v+1]-iit[v]; 496 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 497 if (nconn > order) ends++; 498 else if (nconn == order) ints++; 499 else undef++; 500 } 501 if (undef || ends > 2 || ints != order -1) { 502 marks[cum++] = i; 503 PetscCall(PetscBTSet(bte,i)); 504 for (j=ii[i];j<ii[i+1];j++) { 505 PetscCall(PetscBTSet(btv,jj[j])); 506 } 507 } 508 } 509 } 510 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 511 if (!order && ii[i+1] != ii[i]) { 512 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 513 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 514 } 515 } 516 PetscCall(PetscBTDestroy(&btee)); 517 PetscCall(MatSeqAIJRestoreArray(lGe,&vals)); 518 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 519 if (!conforming) { 520 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 521 PetscCall(MatDestroy(&lGt)); 522 } 523 PetscCall(MatZeroRows(lGe,cum,marks,0.,NULL,NULL)); 524 525 /* identify splitpoints and corner candidates */ 526 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 527 if (print) { 528 PetscCall(PetscObjectSetName((PetscObject)lGe,"edgerestr_lG")); 529 PetscCall(MatView(lGe,NULL)); 530 PetscCall(PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt")); 531 PetscCall(MatView(lGt,NULL)); 532 } 533 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 534 PetscCall(MatSeqAIJGetArray(lGt,&vals)); 535 for (i=0;i<nv;i++) { 536 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 537 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 538 if (!order) { /* variable order */ 539 PetscReal vorder = 0.; 540 541 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 542 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 543 PetscCheck(vorder-test <= PETSC_SQRT_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%" PetscInt_FMT ")",(double)vorder,test); 544 ord = 1; 545 } 546 PetscAssert(test%ord == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT,test,i,ord); 547 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 548 if (PetscBTLookup(btbd,jj[j])) { 549 bdir = PETSC_TRUE; 550 break; 551 } 552 if (vc != ecount[jj[j]]) { 553 sneighs = PETSC_FALSE; 554 } else { 555 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 556 for (k=0;k<vc;k++) { 557 if (vn[k] != en[k]) { 558 sneighs = PETSC_FALSE; 559 break; 560 } 561 } 562 } 563 } 564 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 565 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n",i,PetscBools[!sneighs],PetscBools[test >= 3*ord],PetscBools[bdir]); 566 PetscCall(PetscBTSet(btv,i)); 567 } else if (test == ord) { 568 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 569 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %" PetscInt_FMT "\n",i); 570 PetscCall(PetscBTSet(btv,i)); 571 } else { 572 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %" PetscInt_FMT "\n",i); 573 PetscCall(PetscBTSet(btvcand,i)); 574 } 575 } 576 } 577 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs)); 578 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs)); 579 PetscCall(PetscBTDestroy(&btbd)); 580 581 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 582 if (order != 1) { 583 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 584 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 585 for (i=0;i<nv;i++) { 586 if (PetscBTLookup(btvcand,i)) { 587 PetscBool found = PETSC_FALSE; 588 for (j=ii[i];j<ii[i+1] && !found;j++) { 589 PetscInt k,e = jj[j]; 590 if (PetscBTLookup(bte,e)) continue; 591 for (k=iit[e];k<iit[e+1];k++) { 592 PetscInt v = jjt[k]; 593 if (v != i && PetscBTLookup(btvcand,v)) { 594 found = PETSC_TRUE; 595 break; 596 } 597 } 598 } 599 if (!found) { 600 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %" PetscInt_FMT " CLEARED\n",i); 601 PetscCall(PetscBTClear(btvcand,i)); 602 } else { 603 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %" PetscInt_FMT " ACCEPTED\n",i); 604 } 605 } 606 } 607 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 608 } 609 PetscCall(MatSeqAIJRestoreArray(lGt,&vals)); 610 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 611 PetscCall(MatDestroy(&lGe)); 612 613 /* Get the local G^T explicitly */ 614 PetscCall(MatDestroy(&lGt)); 615 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 616 PetscCall(MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 617 618 /* Mark interior nodal dofs */ 619 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 620 PetscCall(PetscBTCreate(nv,&btvi)); 621 for (i=1;i<n_neigh;i++) { 622 for (j=0;j<n_shared[i];j++) { 623 PetscCall(PetscBTSet(btvi,shared[i][j])); 624 } 625 } 626 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 627 628 /* communicate corners and splitpoints */ 629 PetscCall(PetscMalloc1(nv,&vmarks)); 630 PetscCall(PetscArrayzero(sfvleaves,nv)); 631 PetscCall(PetscArrayzero(sfvroots,Lv)); 632 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 633 634 if (print) { 635 IS tbz; 636 637 cum = 0; 638 for (i=0;i<nv;i++) 639 if (sfvleaves[i]) 640 vmarks[cum++] = i; 641 642 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 643 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local")); 644 PetscCall(ISView(tbz,NULL)); 645 PetscCall(ISDestroy(&tbz)); 646 } 647 648 PetscCall(PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 649 PetscCall(PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 650 PetscCall(PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 651 PetscCall(PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 652 653 /* Zero rows of lGt corresponding to identified corners 654 and interior nodal dofs */ 655 cum = 0; 656 for (i=0;i<nv;i++) { 657 if (sfvleaves[i]) { 658 vmarks[cum++] = i; 659 PetscCall(PetscBTSet(btv,i)); 660 } 661 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 662 } 663 PetscCall(PetscBTDestroy(&btvi)); 664 if (print) { 665 IS tbz; 666 667 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 668 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior")); 669 PetscCall(ISView(tbz,NULL)); 670 PetscCall(ISDestroy(&tbz)); 671 } 672 PetscCall(MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL)); 673 PetscCall(PetscFree(vmarks)); 674 PetscCall(PetscSFDestroy(&sfv)); 675 PetscCall(PetscFree2(sfvleaves,sfvroots)); 676 677 /* Recompute G */ 678 PetscCall(MatDestroy(&lG)); 679 PetscCall(MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG)); 680 if (print) { 681 PetscCall(PetscObjectSetName((PetscObject)lG,"used_lG")); 682 PetscCall(MatView(lG,NULL)); 683 PetscCall(PetscObjectSetName((PetscObject)lGt,"used_lGt")); 684 PetscCall(MatView(lGt,NULL)); 685 } 686 687 /* Get primal dofs (if any) */ 688 cum = 0; 689 for (i=0;i<ne;i++) { 690 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 691 } 692 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,marks,marks)); 693 PetscCall(ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals)); 694 if (print) { 695 PetscCall(PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs")); 696 PetscCall(ISView(primals,NULL)); 697 } 698 PetscCall(PetscBTDestroy(&bte)); 699 /* TODO: what if the user passed in some of them ? */ 700 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 701 PetscCall(ISDestroy(&primals)); 702 703 /* Compute edge connectivity */ 704 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_")); 705 706 /* Symbolic conn = lG*lGt */ 707 PetscCall(MatProductCreate(lG,lGt,NULL,&conn)); 708 PetscCall(MatProductSetType(conn,MATPRODUCT_AB)); 709 PetscCall(MatProductSetAlgorithm(conn,"default")); 710 PetscCall(MatProductSetFill(conn,PETSC_DEFAULT)); 711 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_")); 712 PetscCall(MatProductSetFromOptions(conn)); 713 PetscCall(MatProductSymbolic(conn)); 714 715 PetscCall(MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 716 if (fl2g) { 717 PetscBT btf; 718 PetscInt *iia,*jja,*iiu,*jju; 719 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 720 721 /* create CSR for all local dofs */ 722 PetscCall(PetscMalloc1(n+1,&iia)); 723 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 724 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT,pcbddc->mat_graph->nvtxs_csr,n); 725 iiu = pcbddc->mat_graph->xadj; 726 jju = pcbddc->mat_graph->adjncy; 727 } else if (pcbddc->use_local_adj) { 728 rest = PETSC_TRUE; 729 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 730 } else { 731 free = PETSC_TRUE; 732 PetscCall(PetscMalloc2(n+1,&iiu,n,&jju)); 733 iiu[0] = 0; 734 for (i=0;i<n;i++) { 735 iiu[i+1] = i+1; 736 jju[i] = -1; 737 } 738 } 739 740 /* import sizes of CSR */ 741 iia[0] = 0; 742 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 743 744 /* overwrite entries corresponding to the Nedelec field */ 745 PetscCall(PetscBTCreate(n,&btf)); 746 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 747 for (i=0;i<ne;i++) { 748 PetscCall(PetscBTSet(btf,idxs[i])); 749 iia[idxs[i]+1] = ii[i+1]-ii[i]; 750 } 751 752 /* iia in CSR */ 753 for (i=0;i<n;i++) iia[i+1] += iia[i]; 754 755 /* jja in CSR */ 756 PetscCall(PetscMalloc1(iia[n],&jja)); 757 for (i=0;i<n;i++) 758 if (!PetscBTLookup(btf,i)) 759 for (j=0;j<iiu[i+1]-iiu[i];j++) 760 jja[iia[i]+j] = jju[iiu[i]+j]; 761 762 /* map edge dofs connectivity */ 763 if (jj) { 764 PetscCall(ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj)); 765 for (i=0;i<ne;i++) { 766 PetscInt e = idxs[i]; 767 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 768 } 769 } 770 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 771 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER)); 772 if (rest) { 773 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 774 } 775 if (free) PetscCall(PetscFree2(iiu,jju)); 776 PetscCall(PetscBTDestroy(&btf)); 777 } else { 778 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER)); 779 } 780 781 /* Analyze interface for edge dofs */ 782 PetscCall(PCBDDCAnalyzeInterface(pc)); 783 pcbddc->mat_graph->twodim = PETSC_FALSE; 784 785 /* Get coarse edges in the edge space */ 786 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 787 PetscCall(MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 788 789 if (fl2g) { 790 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 791 PetscCall(PetscMalloc1(nee,&eedges)); 792 for (i=0;i<nee;i++) { 793 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 794 } 795 } else { 796 eedges = alleedges; 797 primals = allprimals; 798 } 799 800 /* Mark fine edge dofs with their coarse edge id */ 801 PetscCall(PetscArrayzero(marks,ne)); 802 PetscCall(ISGetLocalSize(primals,&cum)); 803 PetscCall(ISGetIndices(primals,&idxs)); 804 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 805 PetscCall(ISRestoreIndices(primals,&idxs)); 806 if (print) { 807 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs")); 808 PetscCall(ISView(primals,NULL)); 809 } 810 811 maxsize = 0; 812 for (i=0;i<nee;i++) { 813 PetscInt size,mark = i+1; 814 815 PetscCall(ISGetLocalSize(eedges[i],&size)); 816 PetscCall(ISGetIndices(eedges[i],&idxs)); 817 for (j=0;j<size;j++) marks[idxs[j]] = mark; 818 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 819 maxsize = PetscMax(maxsize,size); 820 } 821 822 /* Find coarse edge endpoints */ 823 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 824 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 825 for (i=0;i<nee;i++) { 826 PetscInt mark = i+1,size; 827 828 PetscCall(ISGetLocalSize(eedges[i],&size)); 829 if (!size && nedfieldlocal) continue; 830 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 831 PetscCall(ISGetIndices(eedges[i],&idxs)); 832 if (print) { 833 PetscCall(PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n",i)); 834 PetscCall(ISView(eedges[i],NULL)); 835 } 836 for (j=0;j<size;j++) { 837 PetscInt k, ee = idxs[j]; 838 if (print) PetscPrintf(PETSC_COMM_SELF," idx %" PetscInt_FMT "\n",ee); 839 for (k=ii[ee];k<ii[ee+1];k++) { 840 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %" PetscInt_FMT "\n",jj[k]); 841 if (PetscBTLookup(btv,jj[k])) { 842 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %" PetscInt_FMT "\n",jj[k]); 843 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 844 PetscInt k2; 845 PetscBool corner = PETSC_FALSE; 846 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 847 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,(int)!!PetscBTLookup(btb,jjt[k2])); 848 /* it's a corner if either is connected with an edge dof belonging to a different cc or 849 if the edge dof lie on the natural part of the boundary */ 850 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 851 corner = PETSC_TRUE; 852 break; 853 } 854 } 855 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 856 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %" PetscInt_FMT "\n",jj[k]); 857 PetscCall(PetscBTSet(btv,jj[k])); 858 } else { 859 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 860 } 861 } 862 } 863 } 864 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 865 } 866 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 867 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 868 PetscCall(PetscBTDestroy(&btb)); 869 870 /* Reset marked primal dofs */ 871 PetscCall(ISGetLocalSize(primals,&cum)); 872 PetscCall(ISGetIndices(primals,&idxs)); 873 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 874 PetscCall(ISRestoreIndices(primals,&idxs)); 875 876 /* Now use the initial lG */ 877 PetscCall(MatDestroy(&lG)); 878 PetscCall(MatDestroy(&lGt)); 879 lG = lGinit; 880 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 881 882 /* Compute extended cols indices */ 883 PetscCall(PetscBTCreate(nv,&btvc)); 884 PetscCall(PetscBTCreate(nee,&bter)); 885 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 886 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG,&i)); 887 i *= maxsize; 888 PetscCall(PetscCalloc1(nee,&extcols)); 889 PetscCall(PetscMalloc2(i,&extrow,i,&gidxs)); 890 eerr = PETSC_FALSE; 891 for (i=0;i<nee;i++) { 892 PetscInt size,found = 0; 893 894 cum = 0; 895 PetscCall(ISGetLocalSize(eedges[i],&size)); 896 if (!size && nedfieldlocal) continue; 897 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 898 PetscCall(ISGetIndices(eedges[i],&idxs)); 899 PetscCall(PetscBTMemzero(nv,btvc)); 900 for (j=0;j<size;j++) { 901 PetscInt k,ee = idxs[j]; 902 for (k=ii[ee];k<ii[ee+1];k++) { 903 PetscInt vv = jj[k]; 904 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 905 else if (!PetscBTLookupSet(btvc,vv)) found++; 906 } 907 } 908 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 909 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 910 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 911 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 912 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 913 /* it may happen that endpoints are not defined at this point 914 if it is the case, mark this edge for a second pass */ 915 if (cum != size -1 || found != 2) { 916 PetscCall(PetscBTSet(bter,i)); 917 if (print) { 918 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge")); 919 PetscCall(ISView(eedges[i],NULL)); 920 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol")); 921 PetscCall(ISView(extcols[i],NULL)); 922 } 923 eerr = PETSC_TRUE; 924 } 925 } 926 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 927 PetscCall(MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm)); 928 if (done) { 929 PetscInt *newprimals; 930 931 PetscCall(PetscMalloc1(ne,&newprimals)); 932 PetscCall(ISGetLocalSize(primals,&cum)); 933 PetscCall(ISGetIndices(primals,&idxs)); 934 PetscCall(PetscArraycpy(newprimals,idxs,cum)); 935 PetscCall(ISRestoreIndices(primals,&idxs)); 936 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 937 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %s)\n",PetscBools[eerr]); 938 for (i=0;i<nee;i++) { 939 PetscBool has_candidates = PETSC_FALSE; 940 if (PetscBTLookup(bter,i)) { 941 PetscInt size,mark = i+1; 942 943 PetscCall(ISGetLocalSize(eedges[i],&size)); 944 PetscCall(ISGetIndices(eedges[i],&idxs)); 945 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 946 for (j=0;j<size;j++) { 947 PetscInt k,ee = idxs[j]; 948 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n",ee,ii[ee],ii[ee+1]); 949 for (k=ii[ee];k<ii[ee+1];k++) { 950 /* set all candidates located on the edge as corners */ 951 if (PetscBTLookup(btvcand,jj[k])) { 952 PetscInt k2,vv = jj[k]; 953 has_candidates = PETSC_TRUE; 954 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %" PetscInt_FMT "\n",vv); 955 PetscCall(PetscBTSet(btv,vv)); 956 /* set all edge dofs connected to candidate as primals */ 957 for (k2=iit[vv];k2<iit[vv+1];k2++) { 958 if (marks[jjt[k2]] == mark) { 959 PetscInt k3,ee2 = jjt[k2]; 960 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %" PetscInt_FMT "\n",ee2); 961 newprimals[cum++] = ee2; 962 /* finally set the new corners */ 963 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 964 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %" PetscInt_FMT "\n",jj[k3]); 965 PetscCall(PetscBTSet(btv,jj[k3])); 966 } 967 } 968 } 969 } else { 970 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %" PetscInt_FMT "\n",jj[k]); 971 } 972 } 973 } 974 if (!has_candidates) { /* circular edge */ 975 PetscInt k, ee = idxs[0],*tmarks; 976 977 PetscCall(PetscCalloc1(ne,&tmarks)); 978 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %" PetscInt_FMT "\n",i); 979 for (k=ii[ee];k<ii[ee+1];k++) { 980 PetscInt k2; 981 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %" PetscInt_FMT "\n",jj[k]); 982 PetscCall(PetscBTSet(btv,jj[k])); 983 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 984 } 985 for (j=0;j<size;j++) { 986 if (tmarks[idxs[j]] > 1) { 987 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %" PetscInt_FMT "\n",idxs[j]); 988 newprimals[cum++] = idxs[j]; 989 } 990 } 991 PetscCall(PetscFree(tmarks)); 992 } 993 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 994 } 995 PetscCall(ISDestroy(&extcols[i])); 996 } 997 PetscCall(PetscFree(extcols)); 998 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 999 PetscCall(PetscSortRemoveDupsInt(&cum,newprimals)); 1000 if (fl2g) { 1001 PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals)); 1002 PetscCall(ISDestroy(&primals)); 1003 for (i=0;i<nee;i++) { 1004 PetscCall(ISDestroy(&eedges[i])); 1005 } 1006 PetscCall(PetscFree(eedges)); 1007 } 1008 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1009 PetscCall(ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals)); 1010 PetscCall(PetscFree(newprimals)); 1011 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 1012 PetscCall(ISDestroy(&primals)); 1013 PetscCall(PCBDDCAnalyzeInterface(pc)); 1014 pcbddc->mat_graph->twodim = PETSC_FALSE; 1015 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1016 if (fl2g) { 1017 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 1018 PetscCall(PetscMalloc1(nee,&eedges)); 1019 for (i=0;i<nee;i++) { 1020 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 1021 } 1022 } else { 1023 eedges = alleedges; 1024 primals = allprimals; 1025 } 1026 PetscCall(PetscCalloc1(nee,&extcols)); 1027 1028 /* Mark again */ 1029 PetscCall(PetscArrayzero(marks,ne)); 1030 for (i=0;i<nee;i++) { 1031 PetscInt size,mark = i+1; 1032 1033 PetscCall(ISGetLocalSize(eedges[i],&size)); 1034 PetscCall(ISGetIndices(eedges[i],&idxs)); 1035 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1036 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1037 } 1038 if (print) { 1039 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass")); 1040 PetscCall(ISView(primals,NULL)); 1041 } 1042 1043 /* Recompute extended cols */ 1044 eerr = PETSC_FALSE; 1045 for (i=0;i<nee;i++) { 1046 PetscInt size; 1047 1048 cum = 0; 1049 PetscCall(ISGetLocalSize(eedges[i],&size)); 1050 if (!size && nedfieldlocal) continue; 1051 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 1052 PetscCall(ISGetIndices(eedges[i],&idxs)); 1053 for (j=0;j<size;j++) { 1054 PetscInt k,ee = idxs[j]; 1055 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1056 } 1057 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1058 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 1059 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 1060 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 1061 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 1062 if (cum != size -1) { 1063 if (print) { 1064 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass")); 1065 PetscCall(ISView(eedges[i],NULL)); 1066 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass")); 1067 PetscCall(ISView(extcols[i],NULL)); 1068 } 1069 eerr = PETSC_TRUE; 1070 } 1071 } 1072 } 1073 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1074 PetscCall(PetscFree2(extrow,gidxs)); 1075 PetscCall(PetscBTDestroy(&bter)); 1076 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF)); 1077 /* an error should not occur at this point */ 1078 PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1079 1080 /* Check the number of endpoints */ 1081 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1082 PetscCall(PetscMalloc1(2*nee,&corners)); 1083 PetscCall(PetscMalloc1(nee,&cedges)); 1084 for (i=0;i<nee;i++) { 1085 PetscInt size, found = 0, gc[2]; 1086 1087 /* init with defaults */ 1088 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1089 PetscCall(ISGetLocalSize(eedges[i],&size)); 1090 if (!size && nedfieldlocal) continue; 1091 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 1092 PetscCall(ISGetIndices(eedges[i],&idxs)); 1093 PetscCall(PetscBTMemzero(nv,btvc)); 1094 for (j=0;j<size;j++) { 1095 PetscInt k,ee = idxs[j]; 1096 for (k=ii[ee];k<ii[ee+1];k++) { 1097 PetscInt vv = jj[k]; 1098 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1099 PetscCheck(found != 2,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %" PetscInt_FMT,i); 1100 corners[i*2+found++] = vv; 1101 } 1102 } 1103 } 1104 if (found != 2) { 1105 PetscInt e; 1106 if (fl2g) { 1107 PetscCall(ISLocalToGlobalMappingApply(fl2g,1,idxs,&e)); 1108 } else { 1109 e = idxs[0]; 1110 } 1111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")",found,i,e,idxs[0]); 1112 } 1113 1114 /* get primal dof index on this coarse edge */ 1115 PetscCall(ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc)); 1116 if (gc[0] > gc[1]) { 1117 PetscInt swap = corners[2*i]; 1118 corners[2*i] = corners[2*i+1]; 1119 corners[2*i+1] = swap; 1120 } 1121 cedges[i] = idxs[size-1]; 1122 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1123 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1124 } 1125 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1126 PetscCall(PetscBTDestroy(&btvc)); 1127 1128 if (PetscDefined(USE_DEBUG)) { 1129 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1130 not interfere with neighbouring coarse edges */ 1131 PetscCall(PetscMalloc1(nee+1,&emarks)); 1132 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1133 for (i=0;i<nv;i++) { 1134 PetscInt emax = 0,eemax = 0; 1135 1136 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1137 PetscCall(PetscArrayzero(emarks,nee+1)); 1138 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1139 for (j=1;j<nee+1;j++) { 1140 if (emax < emarks[j]) { 1141 emax = emarks[j]; 1142 eemax = j; 1143 } 1144 } 1145 /* not relevant for edges */ 1146 if (!eemax) continue; 1147 1148 for (j=ii[i];j<ii[i+1];j++) { 1149 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1150 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT,marks[jj[j]]-1,eemax,i,jj[j]); 1151 } 1152 } 1153 } 1154 PetscCall(PetscFree(emarks)); 1155 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1156 } 1157 1158 /* Compute extended rows indices for edge blocks of the change of basis */ 1159 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1160 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt,&extmem)); 1161 extmem *= maxsize; 1162 PetscCall(PetscMalloc1(extmem*nee,&extrow)); 1163 PetscCall(PetscMalloc1(nee,&extrows)); 1164 PetscCall(PetscCalloc1(nee,&extrowcum)); 1165 for (i=0;i<nv;i++) { 1166 PetscInt mark = 0,size,start; 1167 1168 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1169 for (j=ii[i];j<ii[i+1];j++) 1170 if (marks[jj[j]] && !mark) 1171 mark = marks[jj[j]]; 1172 1173 /* not relevant */ 1174 if (!mark) continue; 1175 1176 /* import extended row */ 1177 mark--; 1178 start = mark*extmem+extrowcum[mark]; 1179 size = ii[i+1]-ii[i]; 1180 PetscCheck(extrowcum[mark] + size <= extmem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT,extrowcum[mark] + size,extmem); 1181 PetscCall(PetscArraycpy(extrow+start,jj+ii[i],size)); 1182 extrowcum[mark] += size; 1183 } 1184 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1185 PetscCall(MatDestroy(&lGt)); 1186 PetscCall(PetscFree(marks)); 1187 1188 /* Compress extrows */ 1189 cum = 0; 1190 for (i=0;i<nee;i++) { 1191 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1192 PetscCall(PetscSortRemoveDupsInt(&size,start)); 1193 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i])); 1194 cum = PetscMax(cum,size); 1195 } 1196 PetscCall(PetscFree(extrowcum)); 1197 PetscCall(PetscBTDestroy(&btv)); 1198 PetscCall(PetscBTDestroy(&btvcand)); 1199 1200 /* Workspace for lapack inner calls and VecSetValues */ 1201 PetscCall(PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork)); 1202 1203 /* Create change of basis matrix (preallocation can be improved) */ 1204 PetscCall(MatCreate(comm,&T)); 1205 PetscCall(MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,pc->pmat->rmap->N,pc->pmat->rmap->N)); 1206 PetscCall(MatSetType(T,MATAIJ)); 1207 PetscCall(MatSeqAIJSetPreallocation(T,10,NULL)); 1208 PetscCall(MatMPIAIJSetPreallocation(T,10,NULL,10,NULL)); 1209 PetscCall(MatSetLocalToGlobalMapping(T,al2g,al2g)); 1210 PetscCall(MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 1211 PetscCall(MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE)); 1212 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1213 1214 /* Defaults to identity */ 1215 PetscCall(MatCreateVecs(pc->pmat,&tvec,NULL)); 1216 PetscCall(VecSet(tvec,1.0)); 1217 PetscCall(MatDiagonalSet(T,tvec,INSERT_VALUES)); 1218 PetscCall(VecDestroy(&tvec)); 1219 1220 /* Create discrete gradient for the coarser level if needed */ 1221 PetscCall(MatDestroy(&pcbddc->nedcG)); 1222 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1223 if (pcbddc->current_level < pcbddc->max_levels) { 1224 ISLocalToGlobalMapping cel2g,cvl2g; 1225 IS wis,gwis; 1226 PetscInt cnv,cne; 1227 1228 PetscCall(ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis)); 1229 if (fl2g) { 1230 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal)); 1231 } else { 1232 PetscCall(PetscObjectReference((PetscObject)wis)); 1233 pcbddc->nedclocal = wis; 1234 } 1235 PetscCall(ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis)); 1236 PetscCall(ISDestroy(&wis)); 1237 PetscCall(ISRenumber(gwis,NULL,&cne,&wis)); 1238 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cel2g)); 1239 PetscCall(ISDestroy(&wis)); 1240 PetscCall(ISDestroy(&gwis)); 1241 1242 PetscCall(ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis)); 1243 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis)); 1244 PetscCall(ISDestroy(&wis)); 1245 PetscCall(ISRenumber(gwis,NULL,&cnv,&wis)); 1246 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cvl2g)); 1247 PetscCall(ISDestroy(&wis)); 1248 PetscCall(ISDestroy(&gwis)); 1249 1250 PetscCall(MatCreate(comm,&pcbddc->nedcG)); 1251 PetscCall(MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv)); 1252 PetscCall(MatSetType(pcbddc->nedcG,MATAIJ)); 1253 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL)); 1254 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL)); 1255 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g)); 1256 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1257 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1258 } 1259 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1260 1261 #if defined(PRINT_GDET) 1262 inc = 0; 1263 lev = pcbddc->current_level; 1264 #endif 1265 1266 /* Insert values in the change of basis matrix */ 1267 for (i=0;i<nee;i++) { 1268 Mat Gins = NULL, GKins = NULL; 1269 IS cornersis = NULL; 1270 PetscScalar cvals[2]; 1271 1272 if (pcbddc->nedcG) { 1273 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis)); 1274 } 1275 PetscCall(PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork)); 1276 if (Gins && GKins) { 1277 const PetscScalar *data; 1278 const PetscInt *rows,*cols; 1279 PetscInt nrh,nch,nrc,ncc; 1280 1281 PetscCall(ISGetIndices(eedges[i],&cols)); 1282 /* H1 */ 1283 PetscCall(ISGetIndices(extrows[i],&rows)); 1284 PetscCall(MatGetSize(Gins,&nrh,&nch)); 1285 PetscCall(MatDenseGetArrayRead(Gins,&data)); 1286 PetscCall(MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES)); 1287 PetscCall(MatDenseRestoreArrayRead(Gins,&data)); 1288 PetscCall(ISRestoreIndices(extrows[i],&rows)); 1289 /* complement */ 1290 PetscCall(MatGetSize(GKins,&nrc,&ncc)); 1291 PetscCheck(ncc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %" PetscInt_FMT,i); 1292 PetscCheck(ncc + nch == nrc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT,ncc,nch,nrc,i); 1293 PetscCheck(ncc == 1 || !pcbddc->nedcG,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT,i,ncc); 1294 PetscCall(MatDenseGetArrayRead(GKins,&data)); 1295 PetscCall(MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES)); 1296 PetscCall(MatDenseRestoreArrayRead(GKins,&data)); 1297 1298 /* coarse discrete gradient */ 1299 if (pcbddc->nedcG) { 1300 PetscInt cols[2]; 1301 1302 cols[0] = 2*i; 1303 cols[1] = 2*i+1; 1304 PetscCall(MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES)); 1305 } 1306 PetscCall(ISRestoreIndices(eedges[i],&cols)); 1307 } 1308 PetscCall(ISDestroy(&extrows[i])); 1309 PetscCall(ISDestroy(&extcols[i])); 1310 PetscCall(ISDestroy(&cornersis)); 1311 PetscCall(MatDestroy(&Gins)); 1312 PetscCall(MatDestroy(&GKins)); 1313 } 1314 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1315 1316 /* Start assembling */ 1317 PetscCall(MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY)); 1318 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1319 1320 /* Free */ 1321 if (fl2g) { 1322 PetscCall(ISDestroy(&primals)); 1323 for (i=0;i<nee;i++) { 1324 PetscCall(ISDestroy(&eedges[i])); 1325 } 1326 PetscCall(PetscFree(eedges)); 1327 } 1328 1329 /* hack mat_graph with primal dofs on the coarse edges */ 1330 { 1331 PCBDDCGraph graph = pcbddc->mat_graph; 1332 PetscInt *oqueue = graph->queue; 1333 PetscInt *ocptr = graph->cptr; 1334 PetscInt ncc,*idxs; 1335 1336 /* find first primal edge */ 1337 if (pcbddc->nedclocal) { 1338 PetscCall(ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1339 } else { 1340 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges)); 1341 idxs = cedges; 1342 } 1343 cum = 0; 1344 while (cum < nee && cedges[cum] < 0) cum++; 1345 1346 /* adapt connected components */ 1347 PetscCall(PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue)); 1348 graph->cptr[0] = 0; 1349 for (i=0,ncc=0;i<graph->ncc;i++) { 1350 PetscInt lc = ocptr[i+1]-ocptr[i]; 1351 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1352 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1353 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1354 ncc++; 1355 lc--; 1356 cum++; 1357 while (cum < nee && cedges[cum] < 0) cum++; 1358 } 1359 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1360 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1361 ncc++; 1362 } 1363 graph->ncc = ncc; 1364 if (pcbddc->nedclocal) { 1365 PetscCall(ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1366 } 1367 PetscCall(PetscFree2(ocptr,oqueue)); 1368 } 1369 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1370 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1371 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1372 PetscCall(MatDestroy(&conn)); 1373 1374 PetscCall(ISDestroy(&nedfieldlocal)); 1375 PetscCall(PetscFree(extrow)); 1376 PetscCall(PetscFree2(work,rwork)); 1377 PetscCall(PetscFree(corners)); 1378 PetscCall(PetscFree(cedges)); 1379 PetscCall(PetscFree(extrows)); 1380 PetscCall(PetscFree(extcols)); 1381 PetscCall(MatDestroy(&lG)); 1382 1383 /* Complete assembling */ 1384 PetscCall(MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY)); 1385 if (pcbddc->nedcG) { 1386 PetscCall(MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1387 #if 0 1388 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1389 PetscCall(MatView(pcbddc->nedcG,NULL)); 1390 #endif 1391 } 1392 1393 /* set change of basis */ 1394 PetscCall(PCBDDCSetChangeOfBasisMat(pc,T,singular)); 1395 PetscCall(MatDestroy(&T)); 1396 1397 PetscFunctionReturn(0); 1398 } 1399 1400 /* the near-null space of BDDC carries information on quadrature weights, 1401 and these can be collinear -> so cheat with MatNullSpaceCreate 1402 and create a suitable set of basis vectors first */ 1403 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1404 { 1405 PetscInt i; 1406 1407 PetscFunctionBegin; 1408 for (i=0;i<nvecs;i++) { 1409 PetscInt first,last; 1410 1411 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1412 PetscCheck(last-first >= 2*nvecs || !has_const,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1413 if (i>=first && i < last) { 1414 PetscScalar *data; 1415 PetscCall(VecGetArray(quad_vecs[i],&data)); 1416 if (!has_const) { 1417 data[i-first] = 1.; 1418 } else { 1419 data[2*i-first] = 1./PetscSqrtReal(2.); 1420 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1421 } 1422 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1423 } 1424 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1425 } 1426 PetscCall(MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp)); 1427 for (i=0;i<nvecs;i++) { /* reset vectors */ 1428 PetscInt first,last; 1429 PetscCall(VecLockReadPop(quad_vecs[i])); 1430 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1431 if (i>=first && i < last) { 1432 PetscScalar *data; 1433 PetscCall(VecGetArray(quad_vecs[i],&data)); 1434 if (!has_const) { 1435 data[i-first] = 0.; 1436 } else { 1437 data[2*i-first] = 0.; 1438 data[2*i-first+1] = 0.; 1439 } 1440 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1441 } 1442 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1443 PetscCall(VecLockReadPush(quad_vecs[i])); 1444 } 1445 PetscFunctionReturn(0); 1446 } 1447 1448 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1449 { 1450 Mat loc_divudotp; 1451 Vec p,v,vins,quad_vec,*quad_vecs; 1452 ISLocalToGlobalMapping map; 1453 PetscScalar *vals; 1454 const PetscScalar *array; 1455 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1456 PetscInt n_neigh,*neigh,*n_shared,**shared; 1457 PetscMPIInt rank; 1458 1459 PetscFunctionBegin; 1460 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1461 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1462 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A))); 1463 if (!maxneighs) { 1464 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1465 *nnsp = NULL; 1466 PetscFunctionReturn(0); 1467 } 1468 maxsize = 0; 1469 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1470 PetscCall(PetscMalloc2(maxsize,&gidxs,maxsize,&vals)); 1471 /* create vectors to hold quadrature weights */ 1472 PetscCall(MatCreateVecs(A,&quad_vec,NULL)); 1473 if (!transpose) { 1474 PetscCall(MatISGetLocalToGlobalMapping(A,&map,NULL)); 1475 } else { 1476 PetscCall(MatISGetLocalToGlobalMapping(A,NULL,&map)); 1477 } 1478 PetscCall(VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs)); 1479 PetscCall(VecDestroy(&quad_vec)); 1480 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp)); 1481 for (i=0;i<maxneighs;i++) { 1482 PetscCall(VecLockReadPop(quad_vecs[i])); 1483 } 1484 1485 /* compute local quad vec */ 1486 PetscCall(MatISGetLocalMat(divudotp,&loc_divudotp)); 1487 if (!transpose) { 1488 PetscCall(MatCreateVecs(loc_divudotp,&v,&p)); 1489 } else { 1490 PetscCall(MatCreateVecs(loc_divudotp,&p,&v)); 1491 } 1492 PetscCall(VecSet(p,1.)); 1493 if (!transpose) { 1494 PetscCall(MatMultTranspose(loc_divudotp,p,v)); 1495 } else { 1496 PetscCall(MatMult(loc_divudotp,p,v)); 1497 } 1498 if (vl2l) { 1499 Mat lA; 1500 VecScatter sc; 1501 1502 PetscCall(MatISGetLocalMat(A,&lA)); 1503 PetscCall(MatCreateVecs(lA,&vins,NULL)); 1504 PetscCall(VecScatterCreate(v,NULL,vins,vl2l,&sc)); 1505 PetscCall(VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1506 PetscCall(VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1507 PetscCall(VecScatterDestroy(&sc)); 1508 } else { 1509 vins = v; 1510 } 1511 PetscCall(VecGetArrayRead(vins,&array)); 1512 PetscCall(VecDestroy(&p)); 1513 1514 /* insert in global quadrature vecs */ 1515 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 1516 for (i=1;i<n_neigh;i++) { 1517 const PetscInt *idxs; 1518 PetscInt idx,nn,j; 1519 1520 idxs = shared[i]; 1521 nn = n_shared[i]; 1522 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1523 PetscCall(PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx)); 1524 idx = -(idx+1); 1525 PetscCheck(idx >= 0 && idx < maxneighs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")",idx,maxneighs); 1526 PetscCall(ISLocalToGlobalMappingApply(map,nn,idxs,gidxs)); 1527 PetscCall(VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES)); 1528 } 1529 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1530 PetscCall(VecRestoreArrayRead(vins,&array)); 1531 if (vl2l) { 1532 PetscCall(VecDestroy(&vins)); 1533 } 1534 PetscCall(VecDestroy(&v)); 1535 PetscCall(PetscFree2(gidxs,vals)); 1536 1537 /* assemble near null space */ 1538 for (i=0;i<maxneighs;i++) { 1539 PetscCall(VecAssemblyBegin(quad_vecs[i])); 1540 } 1541 for (i=0;i<maxneighs;i++) { 1542 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1543 PetscCall(VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view")); 1544 PetscCall(VecLockReadPush(quad_vecs[i])); 1545 } 1546 PetscCall(VecDestroyVecs(maxneighs,&quad_vecs)); 1547 PetscFunctionReturn(0); 1548 } 1549 1550 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1551 { 1552 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1553 1554 PetscFunctionBegin; 1555 if (primalv) { 1556 if (pcbddc->user_primal_vertices_local) { 1557 IS list[2], newp; 1558 1559 list[0] = primalv; 1560 list[1] = pcbddc->user_primal_vertices_local; 1561 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp)); 1562 PetscCall(ISSortRemoveDups(newp)); 1563 PetscCall(ISDestroy(&list[1])); 1564 pcbddc->user_primal_vertices_local = newp; 1565 } else { 1566 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primalv)); 1567 } 1568 } 1569 PetscFunctionReturn(0); 1570 } 1571 1572 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1573 { 1574 PetscInt f, *comp = (PetscInt *)ctx; 1575 1576 PetscFunctionBegin; 1577 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1578 PetscFunctionReturn(0); 1579 } 1580 1581 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1582 { 1583 Vec local,global; 1584 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1585 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1586 PetscBool monolithic = PETSC_FALSE; 1587 1588 PetscFunctionBegin; 1589 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC"); 1590 PetscCall(PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL)); 1591 PetscOptionsEnd(); 1592 /* need to convert from global to local topology information and remove references to information in global ordering */ 1593 PetscCall(MatCreateVecs(pc->pmat,&global,NULL)); 1594 PetscCall(MatCreateVecs(matis->A,&local,NULL)); 1595 PetscCall(VecBindToCPU(global,PETSC_TRUE)); 1596 PetscCall(VecBindToCPU(local,PETSC_TRUE)); 1597 if (monolithic) { /* just get block size to properly compute vertices */ 1598 if (pcbddc->vertex_size == 1) { 1599 PetscCall(MatGetBlockSize(pc->pmat,&pcbddc->vertex_size)); 1600 } 1601 goto boundary; 1602 } 1603 1604 if (pcbddc->user_provided_isfordofs) { 1605 if (pcbddc->n_ISForDofs) { 1606 PetscInt i; 1607 1608 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal)); 1609 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1610 PetscInt bs; 1611 1612 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i])); 1613 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i],&bs)); 1614 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1615 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1616 } 1617 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1618 pcbddc->n_ISForDofs = 0; 1619 PetscCall(PetscFree(pcbddc->ISForDofs)); 1620 } 1621 } else { 1622 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1623 DM dm; 1624 1625 PetscCall(MatGetDM(pc->pmat, &dm)); 1626 if (!dm) { 1627 PetscCall(PCGetDM(pc, &dm)); 1628 } 1629 if (dm) { 1630 IS *fields; 1631 PetscInt nf,i; 1632 1633 PetscCall(DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL)); 1634 PetscCall(PetscMalloc1(nf,&pcbddc->ISForDofsLocal)); 1635 for (i=0;i<nf;i++) { 1636 PetscInt bs; 1637 1638 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i])); 1639 PetscCall(ISGetBlockSize(fields[i],&bs)); 1640 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1641 PetscCall(ISDestroy(&fields[i])); 1642 } 1643 PetscCall(PetscFree(fields)); 1644 pcbddc->n_ISForDofsLocal = nf; 1645 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1646 PetscContainer c; 1647 1648 PetscCall(PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c)); 1649 if (c) { 1650 MatISLocalFields lf; 1651 PetscCall(PetscContainerGetPointer(c,(void**)&lf)); 1652 PetscCall(PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf)); 1653 } else { /* fallback, create the default fields if bs > 1 */ 1654 PetscInt i, n = matis->A->rmap->n; 1655 PetscCall(MatGetBlockSize(pc->pmat,&i)); 1656 if (i > 1) { 1657 pcbddc->n_ISForDofsLocal = i; 1658 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal)); 1659 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1660 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i])); 1661 } 1662 } 1663 } 1664 } 1665 } else { 1666 PetscInt i; 1667 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1668 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i])); 1669 } 1670 } 1671 } 1672 1673 boundary: 1674 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1675 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal)); 1676 } else if (pcbddc->DirichletBoundariesLocal) { 1677 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal)); 1678 } 1679 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1680 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal)); 1681 } else if (pcbddc->NeumannBoundariesLocal) { 1682 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal)); 1683 } 1684 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1685 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local)); 1686 } 1687 PetscCall(VecDestroy(&global)); 1688 PetscCall(VecDestroy(&local)); 1689 /* detect local disconnected subdomains if requested (use matis->A) */ 1690 if (pcbddc->detect_disconnected) { 1691 IS primalv = NULL; 1692 PetscInt i; 1693 PetscBool filter = pcbddc->detect_disconnected_filter; 1694 1695 for (i=0;i<pcbddc->n_local_subs;i++) { 1696 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1697 } 1698 PetscCall(PetscFree(pcbddc->local_subs)); 1699 PetscCall(PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv)); 1700 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,primalv)); 1701 PetscCall(ISDestroy(&primalv)); 1702 } 1703 /* early stage corner detection */ 1704 { 1705 DM dm; 1706 1707 PetscCall(MatGetDM(pc->pmat,&dm)); 1708 if (!dm) { 1709 PetscCall(PCGetDM(pc,&dm)); 1710 } 1711 if (dm) { 1712 PetscBool isda; 1713 1714 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda)); 1715 if (isda) { 1716 ISLocalToGlobalMapping l2l; 1717 IS corners; 1718 Mat lA; 1719 PetscBool gl,lo; 1720 1721 { 1722 Vec cvec; 1723 const PetscScalar *coords; 1724 PetscInt dof,n,cdim; 1725 PetscBool memc = PETSC_TRUE; 1726 1727 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1728 PetscCall(DMGetCoordinates(dm,&cvec)); 1729 PetscCall(VecGetLocalSize(cvec,&n)); 1730 PetscCall(VecGetBlockSize(cvec,&cdim)); 1731 n /= cdim; 1732 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1733 PetscCall(PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords)); 1734 PetscCall(VecGetArrayRead(cvec,&coords)); 1735 #if defined(PETSC_USE_COMPLEX) 1736 memc = PETSC_FALSE; 1737 #endif 1738 if (dof != 1) memc = PETSC_FALSE; 1739 if (memc) { 1740 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof)); 1741 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1742 PetscReal *bcoords = pcbddc->mat_graph->coords; 1743 PetscInt i, b, d; 1744 1745 for (i=0;i<n;i++) { 1746 for (b=0;b<dof;b++) { 1747 for (d=0;d<cdim;d++) { 1748 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1749 } 1750 } 1751 } 1752 } 1753 PetscCall(VecRestoreArrayRead(cvec,&coords)); 1754 pcbddc->mat_graph->cdim = cdim; 1755 pcbddc->mat_graph->cnloc = dof*n; 1756 pcbddc->mat_graph->cloc = PETSC_FALSE; 1757 } 1758 PetscCall(DMDAGetSubdomainCornersIS(dm,&corners)); 1759 PetscCall(MatISGetLocalMat(pc->pmat,&lA)); 1760 PetscCall(MatGetLocalToGlobalMapping(lA,&l2l,NULL)); 1761 PetscCall(MatISRestoreLocalMat(pc->pmat,&lA)); 1762 lo = (PetscBool)(l2l && corners); 1763 PetscCall(MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 1764 if (gl) { /* From PETSc's DMDA */ 1765 const PetscInt *idx; 1766 PetscInt dof,bs,*idxout,n; 1767 1768 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1769 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l,&bs)); 1770 PetscCall(ISGetLocalSize(corners,&n)); 1771 PetscCall(ISGetIndices(corners,&idx)); 1772 if (bs == dof) { 1773 PetscCall(PetscMalloc1(n,&idxout)); 1774 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout)); 1775 } else { /* the original DMDA local-to-local map have been modified */ 1776 PetscInt i,d; 1777 1778 PetscCall(PetscMalloc1(dof*n,&idxout)); 1779 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1780 PetscCall(ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout)); 1781 1782 bs = 1; 1783 n *= dof; 1784 } 1785 PetscCall(ISRestoreIndices(corners,&idx)); 1786 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1787 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners)); 1788 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,corners)); 1789 PetscCall(ISDestroy(&corners)); 1790 pcbddc->corner_selected = PETSC_TRUE; 1791 pcbddc->corner_selection = PETSC_TRUE; 1792 } 1793 if (corners) { 1794 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1795 } 1796 } 1797 } 1798 } 1799 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1800 DM dm; 1801 1802 PetscCall(MatGetDM(pc->pmat,&dm)); 1803 if (!dm) { 1804 PetscCall(PCGetDM(pc,&dm)); 1805 } 1806 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1807 Vec vcoords; 1808 PetscSection section; 1809 PetscReal *coords; 1810 PetscInt d,cdim,nl,nf,**ctxs; 1811 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1812 /* debug coordinates */ 1813 PetscViewer viewer; 1814 PetscBool flg; 1815 PetscViewerFormat format; 1816 const char *prefix; 1817 1818 PetscCall(DMGetCoordinateDim(dm,&cdim)); 1819 PetscCall(DMGetLocalSection(dm,§ion)); 1820 PetscCall(PetscSectionGetNumFields(section,&nf)); 1821 PetscCall(DMCreateGlobalVector(dm,&vcoords)); 1822 PetscCall(VecGetLocalSize(vcoords,&nl)); 1823 PetscCall(PetscMalloc1(nl*cdim,&coords)); 1824 PetscCall(PetscMalloc2(nf,&funcs,nf,&ctxs)); 1825 PetscCall(PetscMalloc1(nf,&ctxs[0])); 1826 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1827 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1828 1829 /* debug coordinates */ 1830 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 1831 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords),((PetscObject)vcoords)->options,prefix,"-pc_bddc_coords_vec_view",&viewer,&format,&flg)); 1832 if (flg) PetscCall(PetscViewerPushFormat(viewer,format)); 1833 for (d=0;d<cdim;d++) { 1834 PetscInt i; 1835 const PetscScalar *v; 1836 char name[16]; 1837 1838 for (i=0;i<nf;i++) ctxs[i][0] = d; 1839 PetscCall(PetscSNPrintf(name,sizeof(name),"bddc_coords_%d",(int)d)); 1840 PetscCall(PetscObjectSetName((PetscObject)vcoords,name)); 1841 PetscCall(DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords)); 1842 if (flg) PetscCall(VecView(vcoords,viewer)); 1843 PetscCall(VecGetArrayRead(vcoords,&v)); 1844 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1845 PetscCall(VecRestoreArrayRead(vcoords,&v)); 1846 } 1847 PetscCall(VecDestroy(&vcoords)); 1848 PetscCall(PCSetCoordinates(pc,cdim,nl,coords)); 1849 PetscCall(PetscFree(coords)); 1850 PetscCall(PetscFree(ctxs[0])); 1851 PetscCall(PetscFree2(funcs,ctxs)); 1852 if (flg) { 1853 PetscCall(PetscViewerPopFormat(viewer)); 1854 PetscCall(PetscViewerDestroy(&viewer)); 1855 } 1856 } 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1862 { 1863 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1864 IS nis; 1865 const PetscInt *idxs; 1866 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1867 1868 PetscFunctionBegin; 1869 PetscCheck(mop == MPI_LAND || mop == MPI_LOR,PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1870 if (mop == MPI_LAND) { 1871 /* init rootdata with true */ 1872 for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1; 1873 } else { 1874 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 1875 } 1876 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 1877 PetscCall(ISGetLocalSize(*is,&nd)); 1878 PetscCall(ISGetIndices(*is,&idxs)); 1879 for (i=0;i<nd;i++) 1880 if (-1 < idxs[i] && idxs[i] < n) 1881 matis->sf_leafdata[idxs[i]] = 1; 1882 PetscCall(ISRestoreIndices(*is,&idxs)); 1883 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1884 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1885 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1886 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1887 if (mop == MPI_LAND) { 1888 PetscCall(PetscMalloc1(nd,&nidxs)); 1889 } else { 1890 PetscCall(PetscMalloc1(n,&nidxs)); 1891 } 1892 for (i=0,nnd=0;i<n;i++) 1893 if (matis->sf_leafdata[i]) 1894 nidxs[nnd++] = i; 1895 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis)); 1896 PetscCall(ISDestroy(is)); 1897 *is = nis; 1898 PetscFunctionReturn(0); 1899 } 1900 1901 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1902 { 1903 PC_IS *pcis = (PC_IS*)(pc->data); 1904 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1905 1906 PetscFunctionBegin; 1907 if (!pcbddc->benign_have_null) { 1908 PetscFunctionReturn(0); 1909 } 1910 if (pcbddc->ChangeOfBasisMatrix) { 1911 Vec swap; 1912 1913 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change)); 1914 swap = pcbddc->work_change; 1915 pcbddc->work_change = r; 1916 r = swap; 1917 } 1918 PetscCall(VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1919 PetscCall(VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1920 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1921 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D)); 1922 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1923 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 1924 PetscCall(VecSet(z,0.)); 1925 PetscCall(VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1926 PetscCall(VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1927 if (pcbddc->ChangeOfBasisMatrix) { 1928 pcbddc->work_change = r; 1929 PetscCall(VecCopy(z,pcbddc->work_change)); 1930 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z)); 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1936 { 1937 PCBDDCBenignMatMult_ctx ctx; 1938 PetscBool apply_right,apply_left,reset_x; 1939 1940 PetscFunctionBegin; 1941 PetscCall(MatShellGetContext(A,&ctx)); 1942 if (transpose) { 1943 apply_right = ctx->apply_left; 1944 apply_left = ctx->apply_right; 1945 } else { 1946 apply_right = ctx->apply_right; 1947 apply_left = ctx->apply_left; 1948 } 1949 reset_x = PETSC_FALSE; 1950 if (apply_right) { 1951 const PetscScalar *ax; 1952 PetscInt nl,i; 1953 1954 PetscCall(VecGetLocalSize(x,&nl)); 1955 PetscCall(VecGetArrayRead(x,&ax)); 1956 PetscCall(PetscArraycpy(ctx->work,ax,nl)); 1957 PetscCall(VecRestoreArrayRead(x,&ax)); 1958 for (i=0;i<ctx->benign_n;i++) { 1959 PetscScalar sum,val; 1960 const PetscInt *idxs; 1961 PetscInt nz,j; 1962 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1963 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1964 sum = 0.; 1965 if (ctx->apply_p0) { 1966 val = ctx->work[idxs[nz-1]]; 1967 for (j=0;j<nz-1;j++) { 1968 sum += ctx->work[idxs[j]]; 1969 ctx->work[idxs[j]] += val; 1970 } 1971 } else { 1972 for (j=0;j<nz-1;j++) { 1973 sum += ctx->work[idxs[j]]; 1974 } 1975 } 1976 ctx->work[idxs[nz-1]] -= sum; 1977 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1978 } 1979 PetscCall(VecPlaceArray(x,ctx->work)); 1980 reset_x = PETSC_TRUE; 1981 } 1982 if (transpose) { 1983 PetscCall(MatMultTranspose(ctx->A,x,y)); 1984 } else { 1985 PetscCall(MatMult(ctx->A,x,y)); 1986 } 1987 if (reset_x) PetscCall(VecResetArray(x)); 1988 if (apply_left) { 1989 PetscScalar *ay; 1990 PetscInt i; 1991 1992 PetscCall(VecGetArray(y,&ay)); 1993 for (i=0;i<ctx->benign_n;i++) { 1994 PetscScalar sum,val; 1995 const PetscInt *idxs; 1996 PetscInt nz,j; 1997 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1998 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1999 val = -ay[idxs[nz-1]]; 2000 if (ctx->apply_p0) { 2001 sum = 0.; 2002 for (j=0;j<nz-1;j++) { 2003 sum += ay[idxs[j]]; 2004 ay[idxs[j]] += val; 2005 } 2006 ay[idxs[nz-1]] += sum; 2007 } else { 2008 for (j=0;j<nz-1;j++) { 2009 ay[idxs[j]] += val; 2010 } 2011 ay[idxs[nz-1]] = 0.; 2012 } 2013 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 2014 } 2015 PetscCall(VecRestoreArray(y,&ay)); 2016 } 2017 PetscFunctionReturn(0); 2018 } 2019 2020 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2021 { 2022 PetscFunctionBegin; 2023 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE)); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2028 { 2029 PetscFunctionBegin; 2030 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE)); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2035 { 2036 PC_IS *pcis = (PC_IS*)pc->data; 2037 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2038 PCBDDCBenignMatMult_ctx ctx; 2039 2040 PetscFunctionBegin; 2041 if (!restore) { 2042 Mat A_IB,A_BI; 2043 PetscScalar *work; 2044 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2045 2046 PetscCheck(!pcbddc->benign_original_mat,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2047 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2048 PetscCall(PetscMalloc1(pcis->n,&work)); 2049 PetscCall(MatCreate(PETSC_COMM_SELF,&A_IB)); 2050 PetscCall(MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE)); 2051 PetscCall(MatSetType(A_IB,MATSHELL)); 2052 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private)); 2053 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2054 PetscCall(PetscNew(&ctx)); 2055 PetscCall(MatShellSetContext(A_IB,ctx)); 2056 ctx->apply_left = PETSC_TRUE; 2057 ctx->apply_right = PETSC_FALSE; 2058 ctx->apply_p0 = PETSC_FALSE; 2059 ctx->benign_n = pcbddc->benign_n; 2060 if (reuse) { 2061 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2062 ctx->free = PETSC_FALSE; 2063 } else { /* TODO: could be optimized for successive solves */ 2064 ISLocalToGlobalMapping N_to_D; 2065 PetscInt i; 2066 2067 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D)); 2068 PetscCall(PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs)); 2069 for (i=0;i<pcbddc->benign_n;i++) { 2070 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i])); 2071 } 2072 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2073 ctx->free = PETSC_TRUE; 2074 } 2075 ctx->A = pcis->A_IB; 2076 ctx->work = work; 2077 PetscCall(MatSetUp(A_IB)); 2078 PetscCall(MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY)); 2079 PetscCall(MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY)); 2080 pcis->A_IB = A_IB; 2081 2082 /* A_BI as A_IB^T */ 2083 PetscCall(MatCreateTranspose(A_IB,&A_BI)); 2084 pcbddc->benign_original_mat = pcis->A_BI; 2085 pcis->A_BI = A_BI; 2086 } else { 2087 if (!pcbddc->benign_original_mat) { 2088 PetscFunctionReturn(0); 2089 } 2090 PetscCall(MatShellGetContext(pcis->A_IB,&ctx)); 2091 PetscCall(MatDestroy(&pcis->A_IB)); 2092 pcis->A_IB = ctx->A; 2093 ctx->A = NULL; 2094 PetscCall(MatDestroy(&pcis->A_BI)); 2095 pcis->A_BI = pcbddc->benign_original_mat; 2096 pcbddc->benign_original_mat = NULL; 2097 if (ctx->free) { 2098 PetscInt i; 2099 for (i=0;i<ctx->benign_n;i++) { 2100 PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2101 } 2102 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2103 } 2104 PetscCall(PetscFree(ctx->work)); 2105 PetscCall(PetscFree(ctx)); 2106 } 2107 PetscFunctionReturn(0); 2108 } 2109 2110 /* used just in bddc debug mode */ 2111 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2112 { 2113 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2114 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2115 Mat An; 2116 2117 PetscFunctionBegin; 2118 PetscCall(MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An)); 2119 PetscCall(MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL)); 2120 if (is1) { 2121 PetscCall(MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B)); 2122 PetscCall(MatDestroy(&An)); 2123 } else { 2124 *B = An; 2125 } 2126 PetscFunctionReturn(0); 2127 } 2128 2129 /* TODO: add reuse flag */ 2130 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2131 { 2132 Mat Bt; 2133 PetscScalar *a,*bdata; 2134 const PetscInt *ii,*ij; 2135 PetscInt m,n,i,nnz,*bii,*bij; 2136 PetscBool flg_row; 2137 2138 PetscFunctionBegin; 2139 PetscCall(MatGetSize(A,&n,&m)); 2140 PetscCall(MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2141 PetscCall(MatSeqAIJGetArray(A,&a)); 2142 nnz = n; 2143 for (i=0;i<ii[n];i++) { 2144 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2145 } 2146 PetscCall(PetscMalloc1(n+1,&bii)); 2147 PetscCall(PetscMalloc1(nnz,&bij)); 2148 PetscCall(PetscMalloc1(nnz,&bdata)); 2149 nnz = 0; 2150 bii[0] = 0; 2151 for (i=0;i<n;i++) { 2152 PetscInt j; 2153 for (j=ii[i];j<ii[i+1];j++) { 2154 PetscScalar entry = a[j]; 2155 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2156 bij[nnz] = ij[j]; 2157 bdata[nnz] = entry; 2158 nnz++; 2159 } 2160 } 2161 bii[i+1] = nnz; 2162 } 2163 PetscCall(MatSeqAIJRestoreArray(A,&a)); 2164 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt)); 2165 PetscCall(MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2166 { 2167 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2168 b->free_a = PETSC_TRUE; 2169 b->free_ij = PETSC_TRUE; 2170 } 2171 if (*B == A) { 2172 PetscCall(MatDestroy(&A)); 2173 } 2174 *B = Bt; 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2179 { 2180 Mat B = NULL; 2181 DM dm; 2182 IS is_dummy,*cc_n; 2183 ISLocalToGlobalMapping l2gmap_dummy; 2184 PCBDDCGraph graph; 2185 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2186 PetscInt i,n; 2187 PetscInt *xadj,*adjncy; 2188 PetscBool isplex = PETSC_FALSE; 2189 2190 PetscFunctionBegin; 2191 if (ncc) *ncc = 0; 2192 if (cc) *cc = NULL; 2193 if (primalv) *primalv = NULL; 2194 PetscCall(PCBDDCGraphCreate(&graph)); 2195 PetscCall(MatGetDM(pc->pmat,&dm)); 2196 if (!dm) { 2197 PetscCall(PCGetDM(pc,&dm)); 2198 } 2199 if (dm) { 2200 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex)); 2201 } 2202 if (filter) isplex = PETSC_FALSE; 2203 2204 if (isplex) { /* this code has been modified from plexpartition.c */ 2205 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2206 PetscInt *adj = NULL; 2207 IS cellNumbering; 2208 const PetscInt *cellNum; 2209 PetscBool useCone, useClosure; 2210 PetscSection section; 2211 PetscSegBuffer adjBuffer; 2212 PetscSF sfPoint; 2213 2214 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2215 PetscCall(DMGetPointSF(dm, &sfPoint)); 2216 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2217 /* Build adjacency graph via a section/segbuffer */ 2218 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion)); 2219 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2220 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer)); 2221 /* Always use FVM adjacency to create partitioner graph */ 2222 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2223 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2224 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2225 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2226 for (n = 0, p = pStart; p < pEnd; p++) { 2227 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2228 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2229 adjSize = PETSC_DETERMINE; 2230 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2231 for (a = 0; a < adjSize; ++a) { 2232 const PetscInt point = adj[a]; 2233 if (pStart <= point && point < pEnd) { 2234 PetscInt *PETSC_RESTRICT pBuf; 2235 PetscCall(PetscSectionAddDof(section, p, 1)); 2236 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2237 *pBuf = point; 2238 } 2239 } 2240 n++; 2241 } 2242 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2243 /* Derive CSR graph from section/segbuffer */ 2244 PetscCall(PetscSectionSetUp(section)); 2245 PetscCall(PetscSectionGetStorageSize(section, &size)); 2246 PetscCall(PetscMalloc1(n+1, &xadj)); 2247 for (idx = 0, p = pStart; p < pEnd; p++) { 2248 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2249 PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2250 } 2251 xadj[n] = size; 2252 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2253 /* Clean up */ 2254 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2255 PetscCall(PetscSectionDestroy(§ion)); 2256 PetscCall(PetscFree(adj)); 2257 graph->xadj = xadj; 2258 graph->adjncy = adjncy; 2259 } else { 2260 Mat A; 2261 PetscBool isseqaij, flg_row; 2262 2263 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2264 if (!A->rmap->N || !A->cmap->N) { 2265 PetscCall(PCBDDCGraphDestroy(&graph)); 2266 PetscFunctionReturn(0); 2267 } 2268 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij)); 2269 if (!isseqaij && filter) { 2270 PetscBool isseqdense; 2271 2272 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense)); 2273 if (!isseqdense) { 2274 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 2275 } else { /* TODO: rectangular case and LDA */ 2276 PetscScalar *array; 2277 PetscReal chop=1.e-6; 2278 2279 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 2280 PetscCall(MatDenseGetArray(B,&array)); 2281 PetscCall(MatGetSize(B,&n,NULL)); 2282 for (i=0;i<n;i++) { 2283 PetscInt j; 2284 for (j=i+1;j<n;j++) { 2285 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2286 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2287 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2288 } 2289 } 2290 PetscCall(MatDenseRestoreArray(B,&array)); 2291 PetscCall(MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B)); 2292 } 2293 } else { 2294 PetscCall(PetscObjectReference((PetscObject)A)); 2295 B = A; 2296 } 2297 PetscCall(MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2298 2299 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2300 if (filter) { 2301 PetscScalar *data; 2302 PetscInt j,cum; 2303 2304 PetscCall(PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered)); 2305 PetscCall(MatSeqAIJGetArray(B,&data)); 2306 cum = 0; 2307 for (i=0;i<n;i++) { 2308 PetscInt t; 2309 2310 for (j=xadj[i];j<xadj[i+1];j++) { 2311 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2312 continue; 2313 } 2314 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2315 } 2316 t = xadj_filtered[i]; 2317 xadj_filtered[i] = cum; 2318 cum += t; 2319 } 2320 PetscCall(MatSeqAIJRestoreArray(B,&data)); 2321 graph->xadj = xadj_filtered; 2322 graph->adjncy = adjncy_filtered; 2323 } else { 2324 graph->xadj = xadj; 2325 graph->adjncy = adjncy; 2326 } 2327 } 2328 /* compute local connected components using PCBDDCGraph */ 2329 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy)); 2330 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy)); 2331 PetscCall(ISDestroy(&is_dummy)); 2332 PetscCall(PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT)); 2333 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2334 PetscCall(PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL)); 2335 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2336 2337 /* partial clean up */ 2338 PetscCall(PetscFree2(xadj_filtered,adjncy_filtered)); 2339 if (B) { 2340 PetscBool flg_row; 2341 PetscCall(MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2342 PetscCall(MatDestroy(&B)); 2343 } 2344 if (isplex) { 2345 PetscCall(PetscFree(xadj)); 2346 PetscCall(PetscFree(adjncy)); 2347 } 2348 2349 /* get back data */ 2350 if (isplex) { 2351 if (ncc) *ncc = graph->ncc; 2352 if (cc || primalv) { 2353 Mat A; 2354 PetscBT btv,btvt; 2355 PetscSection subSection; 2356 PetscInt *ids,cum,cump,*cids,*pids; 2357 2358 PetscCall(DMPlexGetSubdomainSection(dm,&subSection)); 2359 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2360 PetscCall(PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids)); 2361 PetscCall(PetscBTCreate(A->rmap->n,&btv)); 2362 PetscCall(PetscBTCreate(A->rmap->n,&btvt)); 2363 2364 cids[0] = 0; 2365 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2366 PetscInt j; 2367 2368 PetscCall(PetscBTMemzero(A->rmap->n,btvt)); 2369 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2370 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2371 2372 PetscCall(DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2373 for (k = 0; k < 2*size; k += 2) { 2374 PetscInt s, pp, p = closure[k], off, dof, cdof; 2375 2376 PetscCall(PetscSectionGetConstraintDof(subSection,p,&cdof)); 2377 PetscCall(PetscSectionGetOffset(subSection,p,&off)); 2378 PetscCall(PetscSectionGetDof(subSection,p,&dof)); 2379 for (s = 0; s < dof-cdof; s++) { 2380 if (PetscBTLookupSet(btvt,off+s)) continue; 2381 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2382 else pids[cump++] = off+s; /* cross-vertex */ 2383 } 2384 PetscCall(DMPlexGetTreeParent(dm,p,&pp,NULL)); 2385 if (pp != p) { 2386 PetscCall(PetscSectionGetConstraintDof(subSection,pp,&cdof)); 2387 PetscCall(PetscSectionGetOffset(subSection,pp,&off)); 2388 PetscCall(PetscSectionGetDof(subSection,pp,&dof)); 2389 for (s = 0; s < dof-cdof; s++) { 2390 if (PetscBTLookupSet(btvt,off+s)) continue; 2391 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2392 else pids[cump++] = off+s; /* cross-vertex */ 2393 } 2394 } 2395 } 2396 PetscCall(DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2397 } 2398 cids[i+1] = cum; 2399 /* mark dofs as already assigned */ 2400 for (j = cids[i]; j < cids[i+1]; j++) { 2401 PetscCall(PetscBTSet(btv,ids[j])); 2402 } 2403 } 2404 if (cc) { 2405 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2406 for (i = 0; i < graph->ncc; i++) { 2407 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i])); 2408 } 2409 *cc = cc_n; 2410 } 2411 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv)); 2412 PetscCall(PetscFree3(ids,cids,pids)); 2413 PetscCall(PetscBTDestroy(&btv)); 2414 PetscCall(PetscBTDestroy(&btvt)); 2415 } 2416 } else { 2417 if (ncc) *ncc = graph->ncc; 2418 if (cc) { 2419 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2420 for (i=0;i<graph->ncc;i++) { 2421 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i])); 2422 } 2423 *cc = cc_n; 2424 } 2425 } 2426 /* clean up graph */ 2427 graph->xadj = NULL; 2428 graph->adjncy = NULL; 2429 PetscCall(PCBDDCGraphDestroy(&graph)); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2434 { 2435 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2436 PC_IS* pcis = (PC_IS*)(pc->data); 2437 IS dirIS = NULL; 2438 PetscInt i; 2439 2440 PetscFunctionBegin; 2441 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS)); 2442 if (zerodiag) { 2443 Mat A; 2444 Vec vec3_N; 2445 PetscScalar *vals; 2446 const PetscInt *idxs; 2447 PetscInt nz,*count; 2448 2449 /* p0 */ 2450 PetscCall(VecSet(pcis->vec1_N,0.)); 2451 PetscCall(PetscMalloc1(pcis->n,&vals)); 2452 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2453 PetscCall(ISGetIndices(zerodiag,&idxs)); 2454 for (i=0;i<nz;i++) vals[i] = 1.; 2455 PetscCall(VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES)); 2456 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2457 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2458 /* v_I */ 2459 PetscCall(VecSetRandom(pcis->vec2_N,NULL)); 2460 for (i=0;i<nz;i++) vals[i] = 0.; 2461 PetscCall(VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES)); 2462 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2463 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2464 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2465 PetscCall(VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES)); 2466 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2467 if (dirIS) { 2468 PetscInt n; 2469 2470 PetscCall(ISGetLocalSize(dirIS,&n)); 2471 PetscCall(ISGetIndices(dirIS,&idxs)); 2472 for (i=0;i<n;i++) vals[i] = 0.; 2473 PetscCall(VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES)); 2474 PetscCall(ISRestoreIndices(dirIS,&idxs)); 2475 } 2476 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2477 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2478 PetscCall(VecDuplicate(pcis->vec1_N,&vec3_N)); 2479 PetscCall(VecSet(vec3_N,0.)); 2480 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2481 PetscCall(MatMult(A,pcis->vec1_N,vec3_N)); 2482 PetscCall(VecDot(vec3_N,pcis->vec2_N,&vals[0])); 2483 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",(double)PetscAbsScalar(vals[0])); 2484 PetscCall(PetscFree(vals)); 2485 PetscCall(VecDestroy(&vec3_N)); 2486 2487 /* there should not be any pressure dofs lying on the interface */ 2488 PetscCall(PetscCalloc1(pcis->n,&count)); 2489 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2490 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2491 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2492 PetscCall(ISGetIndices(zerodiag,&idxs)); 2493 for (i=0;i<nz;i++) PetscCheck(!count[idxs[i]],PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof",idxs[i]); 2494 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2495 PetscCall(PetscFree(count)); 2496 } 2497 PetscCall(ISDestroy(&dirIS)); 2498 2499 /* check PCBDDCBenignGetOrSetP0 */ 2500 PetscCall(VecSetRandom(pcis->vec1_global,NULL)); 2501 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2502 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE)); 2503 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2504 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE)); 2505 for (i=0;i<pcbddc->benign_n;i++) { 2506 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2507 PetscCheck(val == -PetscGlobalRank-i,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g",(double)PetscRealPart(pcbddc->benign_p0[i]),i,(double)(-PetscGlobalRank-i)); 2508 } 2509 PetscFunctionReturn(0); 2510 } 2511 2512 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2513 { 2514 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2515 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2516 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2517 PetscInt nz,n,benign_n,bsp = 1; 2518 PetscInt *interior_dofs,n_interior_dofs,nneu; 2519 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2520 2521 PetscFunctionBegin; 2522 if (reuse) goto project_b0; 2523 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2524 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2525 for (n=0;n<pcbddc->benign_n;n++) { 2526 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2527 } 2528 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2529 has_null_pressures = PETSC_TRUE; 2530 have_null = PETSC_TRUE; 2531 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2532 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2533 Checks if all the pressure dofs in each subdomain have a zero diagonal 2534 If not, a change of basis on pressures is not needed 2535 since the local Schur complements are already SPD 2536 */ 2537 if (pcbddc->n_ISForDofsLocal) { 2538 IS iP = NULL; 2539 PetscInt p,*pp; 2540 PetscBool flg; 2541 2542 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp)); 2543 n = pcbddc->n_ISForDofsLocal; 2544 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC"); 2545 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg)); 2546 PetscOptionsEnd(); 2547 if (!flg) { 2548 n = 1; 2549 pp[0] = pcbddc->n_ISForDofsLocal-1; 2550 } 2551 2552 bsp = 0; 2553 for (p=0;p<n;p++) { 2554 PetscInt bs; 2555 2556 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal,PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %" PetscInt_FMT,pp[p]); 2557 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2558 bsp += bs; 2559 } 2560 PetscCall(PetscMalloc1(bsp,&bzerodiag)); 2561 bsp = 0; 2562 for (p=0;p<n;p++) { 2563 const PetscInt *idxs; 2564 PetscInt b,bs,npl,*bidxs; 2565 2566 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2567 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl)); 2568 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2569 PetscCall(PetscMalloc1(npl/bs,&bidxs)); 2570 for (b=0;b<bs;b++) { 2571 PetscInt i; 2572 2573 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2574 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp])); 2575 bsp++; 2576 } 2577 PetscCall(PetscFree(bidxs)); 2578 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2579 } 2580 PetscCall(ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures)); 2581 2582 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2583 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP)); 2584 if (iP) { 2585 IS newpressures; 2586 2587 PetscCall(ISDifference(pressures,iP,&newpressures)); 2588 PetscCall(ISDestroy(&pressures)); 2589 pressures = newpressures; 2590 } 2591 PetscCall(ISSorted(pressures,&sorted)); 2592 if (!sorted) { 2593 PetscCall(ISSort(pressures)); 2594 } 2595 PetscCall(PetscFree(pp)); 2596 } 2597 2598 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2599 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2600 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2601 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag)); 2602 PetscCall(ISSorted(zerodiag,&sorted)); 2603 if (!sorted) { 2604 PetscCall(ISSort(zerodiag)); 2605 } 2606 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2607 zerodiag_save = zerodiag; 2608 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2609 if (!nz) { 2610 if (n) have_null = PETSC_FALSE; 2611 has_null_pressures = PETSC_FALSE; 2612 PetscCall(ISDestroy(&zerodiag)); 2613 } 2614 recompute_zerodiag = PETSC_FALSE; 2615 2616 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2617 zerodiag_subs = NULL; 2618 benign_n = 0; 2619 n_interior_dofs = 0; 2620 interior_dofs = NULL; 2621 nneu = 0; 2622 if (pcbddc->NeumannBoundariesLocal) { 2623 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu)); 2624 } 2625 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2626 if (checkb) { /* need to compute interior nodes */ 2627 PetscInt n,i,j; 2628 PetscInt n_neigh,*neigh,*n_shared,**shared; 2629 PetscInt *iwork; 2630 2631 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping,&n)); 2632 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2633 PetscCall(PetscCalloc1(n,&iwork)); 2634 PetscCall(PetscMalloc1(n,&interior_dofs)); 2635 for (i=1;i<n_neigh;i++) 2636 for (j=0;j<n_shared[i];j++) 2637 iwork[shared[i][j]] += 1; 2638 for (i=0;i<n;i++) 2639 if (!iwork[i]) 2640 interior_dofs[n_interior_dofs++] = i; 2641 PetscCall(PetscFree(iwork)); 2642 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2643 } 2644 if (has_null_pressures) { 2645 IS *subs; 2646 PetscInt nsubs,i,j,nl; 2647 const PetscInt *idxs; 2648 PetscScalar *array; 2649 Vec *work; 2650 2651 subs = pcbddc->local_subs; 2652 nsubs = pcbddc->n_local_subs; 2653 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2654 if (checkb) { 2655 PetscCall(VecDuplicateVecs(matis->y,2,&work)); 2656 PetscCall(ISGetLocalSize(zerodiag,&nl)); 2657 PetscCall(ISGetIndices(zerodiag,&idxs)); 2658 /* work[0] = 1_p */ 2659 PetscCall(VecSet(work[0],0.)); 2660 PetscCall(VecGetArray(work[0],&array)); 2661 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2662 PetscCall(VecRestoreArray(work[0],&array)); 2663 /* work[0] = 1_v */ 2664 PetscCall(VecSet(work[1],1.)); 2665 PetscCall(VecGetArray(work[1],&array)); 2666 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2667 PetscCall(VecRestoreArray(work[1],&array)); 2668 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2669 } 2670 2671 if (nsubs > 1 || bsp > 1) { 2672 IS *is; 2673 PetscInt b,totb; 2674 2675 totb = bsp; 2676 is = bsp > 1 ? bzerodiag : &zerodiag; 2677 nsubs = PetscMax(nsubs,1); 2678 PetscCall(PetscCalloc1(nsubs*totb,&zerodiag_subs)); 2679 for (b=0;b<totb;b++) { 2680 for (i=0;i<nsubs;i++) { 2681 ISLocalToGlobalMapping l2g; 2682 IS t_zerodiag_subs; 2683 PetscInt nl; 2684 2685 if (subs) { 2686 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i],&l2g)); 2687 } else { 2688 IS tis; 2689 2690 PetscCall(MatGetLocalSize(pcbddc->local_mat,&nl,NULL)); 2691 PetscCall(ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis)); 2692 PetscCall(ISLocalToGlobalMappingCreateIS(tis,&l2g)); 2693 PetscCall(ISDestroy(&tis)); 2694 } 2695 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs)); 2696 PetscCall(ISGetLocalSize(t_zerodiag_subs,&nl)); 2697 if (nl) { 2698 PetscBool valid = PETSC_TRUE; 2699 2700 if (checkb) { 2701 PetscCall(VecSet(matis->x,0)); 2702 PetscCall(ISGetLocalSize(subs[i],&nl)); 2703 PetscCall(ISGetIndices(subs[i],&idxs)); 2704 PetscCall(VecGetArray(matis->x,&array)); 2705 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2706 PetscCall(VecRestoreArray(matis->x,&array)); 2707 PetscCall(ISRestoreIndices(subs[i],&idxs)); 2708 PetscCall(VecPointwiseMult(matis->x,work[0],matis->x)); 2709 PetscCall(MatMult(matis->A,matis->x,matis->y)); 2710 PetscCall(VecPointwiseMult(matis->y,work[1],matis->y)); 2711 PetscCall(VecGetArray(matis->y,&array)); 2712 for (j=0;j<n_interior_dofs;j++) { 2713 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2714 valid = PETSC_FALSE; 2715 break; 2716 } 2717 } 2718 PetscCall(VecRestoreArray(matis->y,&array)); 2719 } 2720 if (valid && nneu) { 2721 const PetscInt *idxs; 2722 PetscInt nzb; 2723 2724 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2725 PetscCall(ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL)); 2726 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2727 if (nzb) valid = PETSC_FALSE; 2728 } 2729 if (valid && pressures) { 2730 IS t_pressure_subs,tmp; 2731 PetscInt i1,i2; 2732 2733 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs)); 2734 PetscCall(ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp)); 2735 PetscCall(ISGetLocalSize(tmp,&i1)); 2736 PetscCall(ISGetLocalSize(t_zerodiag_subs,&i2)); 2737 if (i2 != i1) valid = PETSC_FALSE; 2738 PetscCall(ISDestroy(&t_pressure_subs)); 2739 PetscCall(ISDestroy(&tmp)); 2740 } 2741 if (valid) { 2742 PetscCall(ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n])); 2743 benign_n++; 2744 } else recompute_zerodiag = PETSC_TRUE; 2745 } 2746 PetscCall(ISDestroy(&t_zerodiag_subs)); 2747 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2748 } 2749 } 2750 } else { /* there's just one subdomain (or zero if they have not been detected */ 2751 PetscBool valid = PETSC_TRUE; 2752 2753 if (nneu) valid = PETSC_FALSE; 2754 if (valid && pressures) { 2755 PetscCall(ISEqual(pressures,zerodiag,&valid)); 2756 } 2757 if (valid && checkb) { 2758 PetscCall(MatMult(matis->A,work[0],matis->x)); 2759 PetscCall(VecPointwiseMult(matis->x,work[1],matis->x)); 2760 PetscCall(VecGetArray(matis->x,&array)); 2761 for (j=0;j<n_interior_dofs;j++) { 2762 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2763 valid = PETSC_FALSE; 2764 break; 2765 } 2766 } 2767 PetscCall(VecRestoreArray(matis->x,&array)); 2768 } 2769 if (valid) { 2770 benign_n = 1; 2771 PetscCall(PetscMalloc1(benign_n,&zerodiag_subs)); 2772 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2773 zerodiag_subs[0] = zerodiag; 2774 } 2775 } 2776 if (checkb) { 2777 PetscCall(VecDestroyVecs(2,&work)); 2778 } 2779 } 2780 PetscCall(PetscFree(interior_dofs)); 2781 2782 if (!benign_n) { 2783 PetscInt n; 2784 2785 PetscCall(ISDestroy(&zerodiag)); 2786 recompute_zerodiag = PETSC_FALSE; 2787 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2788 if (n) have_null = PETSC_FALSE; 2789 } 2790 2791 /* final check for null pressures */ 2792 if (zerodiag && pressures) { 2793 PetscCall(ISEqual(pressures,zerodiag,&have_null)); 2794 } 2795 2796 if (recompute_zerodiag) { 2797 PetscCall(ISDestroy(&zerodiag)); 2798 if (benign_n == 1) { 2799 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2800 zerodiag = zerodiag_subs[0]; 2801 } else { 2802 PetscInt i,nzn,*new_idxs; 2803 2804 nzn = 0; 2805 for (i=0;i<benign_n;i++) { 2806 PetscInt ns; 2807 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2808 nzn += ns; 2809 } 2810 PetscCall(PetscMalloc1(nzn,&new_idxs)); 2811 nzn = 0; 2812 for (i=0;i<benign_n;i++) { 2813 PetscInt ns,*idxs; 2814 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2815 PetscCall(ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2816 PetscCall(PetscArraycpy(new_idxs+nzn,idxs,ns)); 2817 PetscCall(ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2818 nzn += ns; 2819 } 2820 PetscCall(PetscSortInt(nzn,new_idxs)); 2821 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag)); 2822 } 2823 have_null = PETSC_FALSE; 2824 } 2825 2826 /* determines if the coarse solver will be singular or not */ 2827 PetscCall(MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 2828 2829 /* Prepare matrix to compute no-net-flux */ 2830 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2831 Mat A,loc_divudotp; 2832 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2833 IS row,col,isused = NULL; 2834 PetscInt M,N,n,st,n_isused; 2835 2836 if (pressures) { 2837 isused = pressures; 2838 } else { 2839 isused = zerodiag_save; 2840 } 2841 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL)); 2842 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2843 PetscCall(MatGetLocalSize(A,&n,NULL)); 2844 PetscCheck(isused || (n == 0),PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2845 n_isused = 0; 2846 if (isused) { 2847 PetscCall(ISGetLocalSize(isused,&n_isused)); 2848 } 2849 PetscCallMPI(MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 2850 st = st-n_isused; 2851 if (n) { 2852 const PetscInt *gidxs; 2853 2854 PetscCall(MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp)); 2855 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 2856 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2857 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2858 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col)); 2859 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 2860 } else { 2861 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp)); 2862 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2863 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col)); 2864 } 2865 PetscCall(MatGetSize(pc->pmat,NULL,&N)); 2866 PetscCall(ISGetSize(row,&M)); 2867 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 2868 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 2869 PetscCall(ISDestroy(&row)); 2870 PetscCall(ISDestroy(&col)); 2871 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp)); 2872 PetscCall(MatSetType(pcbddc->divudotp,MATIS)); 2873 PetscCall(MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N)); 2874 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g)); 2875 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2876 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2877 PetscCall(MatISSetLocalMat(pcbddc->divudotp,loc_divudotp)); 2878 PetscCall(MatDestroy(&loc_divudotp)); 2879 PetscCall(MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2880 PetscCall(MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2881 } 2882 PetscCall(ISDestroy(&zerodiag_save)); 2883 PetscCall(ISDestroy(&pressures)); 2884 if (bzerodiag) { 2885 PetscInt i; 2886 2887 for (i=0;i<bsp;i++) { 2888 PetscCall(ISDestroy(&bzerodiag[i])); 2889 } 2890 PetscCall(PetscFree(bzerodiag)); 2891 } 2892 pcbddc->benign_n = benign_n; 2893 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2894 2895 /* determines if the problem has subdomains with 0 pressure block */ 2896 have_null = (PetscBool)(!!pcbddc->benign_n); 2897 PetscCall(MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 2898 2899 project_b0: 2900 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2901 /* change of basis and p0 dofs */ 2902 if (pcbddc->benign_n) { 2903 PetscInt i,s,*nnz; 2904 2905 /* local change of basis for pressures */ 2906 PetscCall(MatDestroy(&pcbddc->benign_change)); 2907 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change)); 2908 PetscCall(MatSetType(pcbddc->benign_change,MATAIJ)); 2909 PetscCall(MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE)); 2910 PetscCall(PetscMalloc1(n,&nnz)); 2911 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2912 for (i=0;i<pcbddc->benign_n;i++) { 2913 const PetscInt *idxs; 2914 PetscInt nzs,j; 2915 2916 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs)); 2917 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2918 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2919 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2920 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2921 } 2922 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz)); 2923 PetscCall(MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 2924 PetscCall(PetscFree(nnz)); 2925 /* set identity by default */ 2926 for (i=0;i<n;i++) { 2927 PetscCall(MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES)); 2928 } 2929 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 2930 PetscCall(PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0)); 2931 /* set change on pressures */ 2932 for (s=0;s<pcbddc->benign_n;s++) { 2933 PetscScalar *array; 2934 const PetscInt *idxs; 2935 PetscInt nzs; 2936 2937 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs)); 2938 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2939 for (i=0;i<nzs-1;i++) { 2940 PetscScalar vals[2]; 2941 PetscInt cols[2]; 2942 2943 cols[0] = idxs[i]; 2944 cols[1] = idxs[nzs-1]; 2945 vals[0] = 1.; 2946 vals[1] = 1.; 2947 PetscCall(MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES)); 2948 } 2949 PetscCall(PetscMalloc1(nzs,&array)); 2950 for (i=0;i<nzs-1;i++) array[i] = -1.; 2951 array[nzs-1] = 1.; 2952 PetscCall(MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES)); 2953 /* store local idxs for p0 */ 2954 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2955 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2956 PetscCall(PetscFree(array)); 2957 } 2958 PetscCall(MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2959 PetscCall(MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2960 2961 /* project if needed */ 2962 if (pcbddc->benign_change_explicit) { 2963 Mat M; 2964 2965 PetscCall(MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M)); 2966 PetscCall(MatDestroy(&pcbddc->local_mat)); 2967 PetscCall(MatSeqAIJCompress(M,&pcbddc->local_mat)); 2968 PetscCall(MatDestroy(&M)); 2969 } 2970 /* store global idxs for p0 */ 2971 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx)); 2972 } 2973 *zerodiaglocal = zerodiag; 2974 PetscFunctionReturn(0); 2975 } 2976 2977 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2978 { 2979 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2980 PetscScalar *array; 2981 2982 PetscFunctionBegin; 2983 if (!pcbddc->benign_sf) { 2984 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf)); 2985 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx)); 2986 } 2987 if (get) { 2988 PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array)); 2989 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 2990 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 2991 PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 2992 } else { 2993 PetscCall(VecGetArray(v,&array)); 2994 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 2995 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 2996 PetscCall(VecRestoreArray(v,&array)); 2997 } 2998 PetscFunctionReturn(0); 2999 } 3000 3001 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3002 { 3003 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3004 3005 PetscFunctionBegin; 3006 /* TODO: add error checking 3007 - avoid nested pop (or push) calls. 3008 - cannot push before pop. 3009 - cannot call this if pcbddc->local_mat is NULL 3010 */ 3011 if (!pcbddc->benign_n) { 3012 PetscFunctionReturn(0); 3013 } 3014 if (pop) { 3015 if (pcbddc->benign_change_explicit) { 3016 IS is_p0; 3017 MatReuse reuse; 3018 3019 /* extract B_0 */ 3020 reuse = MAT_INITIAL_MATRIX; 3021 if (pcbddc->benign_B0) { 3022 reuse = MAT_REUSE_MATRIX; 3023 } 3024 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0)); 3025 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0)); 3026 /* remove rows and cols from local problem */ 3027 PetscCall(MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE)); 3028 PetscCall(MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 3029 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL)); 3030 PetscCall(ISDestroy(&is_p0)); 3031 } else { 3032 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3033 PetscScalar *vals; 3034 PetscInt i,n,*idxs_ins; 3035 3036 PetscCall(VecGetLocalSize(matis->y,&n)); 3037 PetscCall(PetscMalloc2(n,&idxs_ins,n,&vals)); 3038 if (!pcbddc->benign_B0) { 3039 PetscInt *nnz; 3040 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0)); 3041 PetscCall(MatSetType(pcbddc->benign_B0,MATAIJ)); 3042 PetscCall(MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE)); 3043 PetscCall(PetscMalloc1(pcbddc->benign_n,&nnz)); 3044 for (i=0;i<pcbddc->benign_n;i++) { 3045 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i])); 3046 nnz[i] = n - nnz[i]; 3047 } 3048 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz)); 3049 PetscCall(MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 3050 PetscCall(PetscFree(nnz)); 3051 } 3052 3053 for (i=0;i<pcbddc->benign_n;i++) { 3054 PetscScalar *array; 3055 PetscInt *idxs,j,nz,cum; 3056 3057 PetscCall(VecSet(matis->x,0.)); 3058 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz)); 3059 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3060 for (j=0;j<nz;j++) vals[j] = 1.; 3061 PetscCall(VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES)); 3062 PetscCall(VecAssemblyBegin(matis->x)); 3063 PetscCall(VecAssemblyEnd(matis->x)); 3064 PetscCall(VecSet(matis->y,0.)); 3065 PetscCall(MatMult(matis->A,matis->x,matis->y)); 3066 PetscCall(VecGetArray(matis->y,&array)); 3067 cum = 0; 3068 for (j=0;j<n;j++) { 3069 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3070 vals[cum] = array[j]; 3071 idxs_ins[cum] = j; 3072 cum++; 3073 } 3074 } 3075 PetscCall(MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES)); 3076 PetscCall(VecRestoreArray(matis->y,&array)); 3077 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3078 } 3079 PetscCall(MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3080 PetscCall(MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3081 PetscCall(PetscFree2(idxs_ins,vals)); 3082 } 3083 } else { /* push */ 3084 if (pcbddc->benign_change_explicit) { 3085 PetscInt i; 3086 3087 for (i=0;i<pcbddc->benign_n;i++) { 3088 PetscScalar *B0_vals; 3089 PetscInt *B0_cols,B0_ncol; 3090 3091 PetscCall(MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3092 PetscCall(MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES)); 3093 PetscCall(MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES)); 3094 PetscCall(MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES)); 3095 PetscCall(MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3096 } 3097 PetscCall(MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3098 PetscCall(MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3099 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3100 } 3101 PetscFunctionReturn(0); 3102 } 3103 3104 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3105 { 3106 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3107 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3108 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3109 PetscBLASInt *B_iwork,*B_ifail; 3110 PetscScalar *work,lwork; 3111 PetscScalar *St,*S,*eigv; 3112 PetscScalar *Sarray,*Starray; 3113 PetscReal *eigs,thresh,lthresh,uthresh; 3114 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3115 PetscBool allocated_S_St,upart; 3116 #if defined(PETSC_USE_COMPLEX) 3117 PetscReal *rwork; 3118 #endif 3119 3120 PetscFunctionBegin; 3121 if (!pcbddc->adaptive_selection) PetscFunctionReturn(0); 3122 PetscCheck(sub_schurs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3123 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3124 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3125 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3126 3127 if (pcbddc->dbg_flag) { 3128 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3129 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3130 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 3131 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n")); 3132 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3133 } 3134 3135 if (pcbddc->dbg_flag) { 3136 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef)); 3137 } 3138 3139 /* max size of subsets */ 3140 mss = 0; 3141 for (i=0;i<sub_schurs->n_subs;i++) { 3142 PetscInt subset_size; 3143 3144 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3145 mss = PetscMax(mss,subset_size); 3146 } 3147 3148 /* min/max and threshold */ 3149 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3150 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3151 nmax = PetscMax(nmin,nmax); 3152 allocated_S_St = PETSC_FALSE; 3153 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3154 allocated_S_St = PETSC_TRUE; 3155 } 3156 3157 /* allocate lapack workspace */ 3158 cum = cum2 = 0; 3159 maxneigs = 0; 3160 for (i=0;i<sub_schurs->n_subs;i++) { 3161 PetscInt n,subset_size; 3162 3163 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3164 n = PetscMin(subset_size,nmax); 3165 cum += subset_size; 3166 cum2 += subset_size*n; 3167 maxneigs = PetscMax(maxneigs,n); 3168 } 3169 lwork = 0; 3170 if (mss) { 3171 if (sub_schurs->is_symmetric) { 3172 PetscScalar sdummy = 0.; 3173 PetscBLASInt B_itype = 1; 3174 PetscBLASInt B_N = mss, idummy = 0; 3175 PetscReal rdummy = 0.,zero = 0.0; 3176 PetscReal eps = 0.0; /* dlamch? */ 3177 3178 B_lwork = -1; 3179 /* some implementations may complain about NULL pointers, even if we are querying */ 3180 S = &sdummy; 3181 St = &sdummy; 3182 eigs = &rdummy; 3183 eigv = &sdummy; 3184 B_iwork = &idummy; 3185 B_ifail = &idummy; 3186 #if defined(PETSC_USE_COMPLEX) 3187 rwork = &rdummy; 3188 #endif 3189 thresh = 1.0; 3190 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3191 #if defined(PETSC_USE_COMPLEX) 3192 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3193 #else 3194 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3195 #endif 3196 PetscCheck(B_ierr == 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3197 PetscCall(PetscFPTrapPop()); 3198 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3199 } 3200 3201 nv = 0; 3202 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3203 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv)); 3204 } 3205 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork)); 3206 if (allocated_S_St) { 3207 PetscCall(PetscMalloc2(mss*mss,&S,mss*mss,&St)); 3208 } 3209 PetscCall(PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail)); 3210 #if defined(PETSC_USE_COMPLEX) 3211 PetscCall(PetscMalloc1(7*mss,&rwork)); 3212 #endif 3213 PetscCall(PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3214 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3215 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3216 nv+cum,&pcbddc->adaptive_constraints_idxs, 3217 nv+cum2,&pcbddc->adaptive_constraints_data)); 3218 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs)); 3219 3220 maxneigs = 0; 3221 cum = cumarray = 0; 3222 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3223 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3224 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3225 const PetscInt *idxs; 3226 3227 PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs)); 3228 for (cum=0;cum<nv;cum++) { 3229 pcbddc->adaptive_constraints_n[cum] = 1; 3230 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3231 pcbddc->adaptive_constraints_data[cum] = 1.0; 3232 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3233 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3234 } 3235 PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 3236 } 3237 3238 if (mss) { /* multilevel */ 3239 if (sub_schurs->gdsw) { 3240 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray)); 3241 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3242 } else { 3243 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3244 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3245 } 3246 } 3247 3248 lthresh = pcbddc->adaptive_threshold[0]; 3249 uthresh = pcbddc->adaptive_threshold[1]; 3250 upart = pcbddc->use_deluxe_scaling; 3251 for (i=0;i<sub_schurs->n_subs;i++) { 3252 const PetscInt *idxs; 3253 PetscReal upper,lower; 3254 PetscInt j,subset_size,eigs_start = 0; 3255 PetscBLASInt B_N; 3256 PetscBool same_data = PETSC_FALSE; 3257 PetscBool scal = PETSC_FALSE; 3258 3259 if (upart) { 3260 upper = PETSC_MAX_REAL; 3261 lower = uthresh; 3262 } else { 3263 if (sub_schurs->gdsw) { 3264 upper = uthresh; 3265 lower = PETSC_MIN_REAL; 3266 } else { 3267 PetscCheck(sub_schurs->is_posdef,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3268 upper = 1./uthresh; 3269 lower = 0.; 3270 } 3271 } 3272 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3273 PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs)); 3274 PetscCall(PetscBLASIntCast(subset_size,&B_N)); 3275 /* this is experimental: we assume the dofs have been properly grouped to have 3276 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3277 if (!sub_schurs->is_posdef) { 3278 Mat T; 3279 3280 for (j=0;j<subset_size;j++) { 3281 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3282 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T)); 3283 PetscCall(MatScale(T,-1.0)); 3284 PetscCall(MatDestroy(&T)); 3285 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T)); 3286 PetscCall(MatScale(T,-1.0)); 3287 PetscCall(MatDestroy(&T)); 3288 if (sub_schurs->change_primal_sub) { 3289 PetscInt nz,k; 3290 const PetscInt *idxs; 3291 3292 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz)); 3293 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i],&idxs)); 3294 for (k=0;k<nz;k++) { 3295 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3296 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3297 } 3298 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs)); 3299 } 3300 scal = PETSC_TRUE; 3301 break; 3302 } 3303 } 3304 } 3305 3306 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3307 if (sub_schurs->is_symmetric) { 3308 PetscInt j,k; 3309 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3310 PetscCall(PetscArrayzero(S,subset_size*subset_size)); 3311 PetscCall(PetscArrayzero(St,subset_size*subset_size)); 3312 } 3313 for (j=0;j<subset_size;j++) { 3314 for (k=j;k<subset_size;k++) { 3315 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3316 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3317 } 3318 } 3319 } else { 3320 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3321 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3322 } 3323 } else { 3324 S = Sarray + cumarray; 3325 St = Starray + cumarray; 3326 } 3327 /* see if we can save some work */ 3328 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3329 PetscCall(PetscArraycmp(S,St,subset_size*subset_size,&same_data)); 3330 } 3331 3332 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3333 B_neigs = 0; 3334 } else { 3335 if (sub_schurs->is_symmetric) { 3336 PetscBLASInt B_itype = 1; 3337 PetscBLASInt B_IL, B_IU; 3338 PetscReal eps = -1.0; /* dlamch? */ 3339 PetscInt nmin_s; 3340 PetscBool compute_range; 3341 3342 B_neigs = 0; 3343 compute_range = (PetscBool)!same_data; 3344 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3345 3346 if (pcbddc->dbg_flag) { 3347 PetscInt nc = 0; 3348 3349 if (sub_schurs->change_primal_sub) { 3350 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc)); 3351 } 3352 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc)); 3353 } 3354 3355 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3356 if (compute_range) { 3357 3358 /* ask for eigenvalues larger than thresh */ 3359 if (sub_schurs->is_posdef) { 3360 #if defined(PETSC_USE_COMPLEX) 3361 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3362 #else 3363 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3364 #endif 3365 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3366 } else { /* no theory so far, but it works nicely */ 3367 PetscInt recipe = 0,recipe_m = 1; 3368 PetscReal bb[2]; 3369 3370 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL)); 3371 switch (recipe) { 3372 case 0: 3373 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3374 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3375 #if defined(PETSC_USE_COMPLEX) 3376 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3377 #else 3378 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3379 #endif 3380 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3381 break; 3382 case 1: 3383 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3384 #if defined(PETSC_USE_COMPLEX) 3385 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3386 #else 3387 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3388 #endif 3389 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3390 if (!scal) { 3391 PetscBLASInt B_neigs2 = 0; 3392 3393 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3394 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3395 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3396 #if defined(PETSC_USE_COMPLEX) 3397 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3398 #else 3399 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3400 #endif 3401 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3402 B_neigs += B_neigs2; 3403 } 3404 break; 3405 case 2: 3406 if (scal) { 3407 bb[0] = PETSC_MIN_REAL; 3408 bb[1] = 0; 3409 #if defined(PETSC_USE_COMPLEX) 3410 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3411 #else 3412 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3413 #endif 3414 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3415 } else { 3416 PetscBLASInt B_neigs2 = 0; 3417 PetscBool import = PETSC_FALSE; 3418 3419 lthresh = PetscMax(lthresh,0.0); 3420 if (lthresh > 0.0) { 3421 bb[0] = PETSC_MIN_REAL; 3422 bb[1] = lthresh*lthresh; 3423 3424 import = PETSC_TRUE; 3425 #if defined(PETSC_USE_COMPLEX) 3426 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3427 #else 3428 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3429 #endif 3430 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3431 } 3432 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3433 bb[1] = PETSC_MAX_REAL; 3434 if (import) { 3435 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3436 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3437 } 3438 #if defined(PETSC_USE_COMPLEX) 3439 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3440 #else 3441 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3442 #endif 3443 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3444 B_neigs += B_neigs2; 3445 } 3446 break; 3447 case 3: 3448 if (scal) { 3449 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL)); 3450 } else { 3451 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL)); 3452 } 3453 if (!scal) { 3454 bb[0] = uthresh; 3455 bb[1] = PETSC_MAX_REAL; 3456 #if defined(PETSC_USE_COMPLEX) 3457 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3458 #else 3459 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3460 #endif 3461 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3462 } 3463 if (recipe_m > 0 && B_N - B_neigs > 0) { 3464 PetscBLASInt B_neigs2 = 0; 3465 3466 B_IL = 1; 3467 PetscCall(PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU)); 3468 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3469 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3470 #if defined(PETSC_USE_COMPLEX) 3471 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3472 #else 3473 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3474 #endif 3475 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3476 B_neigs += B_neigs2; 3477 } 3478 break; 3479 case 4: 3480 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3481 #if defined(PETSC_USE_COMPLEX) 3482 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3483 #else 3484 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3485 #endif 3486 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3487 { 3488 PetscBLASInt B_neigs2 = 0; 3489 3490 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3491 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3492 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3493 #if defined(PETSC_USE_COMPLEX) 3494 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3495 #else 3496 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3497 #endif 3498 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3499 B_neigs += B_neigs2; 3500 } 3501 break; 3502 case 5: /* same as before: first compute all eigenvalues, then filter */ 3503 #if defined(PETSC_USE_COMPLEX) 3504 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3505 #else 3506 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3507 #endif 3508 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3509 { 3510 PetscInt e,k,ne; 3511 for (e=0,ne=0;e<B_neigs;e++) { 3512 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3513 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3514 eigs[ne] = eigs[e]; 3515 ne++; 3516 } 3517 } 3518 PetscCall(PetscArraycpy(eigv,S,B_N*ne)); 3519 B_neigs = ne; 3520 } 3521 break; 3522 default: 3523 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %" PetscInt_FMT,recipe); 3524 } 3525 } 3526 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3527 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3528 B_IL = 1; 3529 #if defined(PETSC_USE_COMPLEX) 3530 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3531 #else 3532 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3533 #endif 3534 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3535 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3536 PetscInt k; 3537 PetscCheck(sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3538 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax)); 3539 PetscCall(PetscBLASIntCast(nmax,&B_neigs)); 3540 nmin = nmax; 3541 PetscCall(PetscArrayzero(eigv,subset_size*nmax)); 3542 for (k=0;k<nmax;k++) { 3543 eigs[k] = 1./PETSC_SMALL; 3544 eigv[k*(subset_size+1)] = 1.0; 3545 } 3546 } 3547 PetscCall(PetscFPTrapPop()); 3548 if (B_ierr) { 3549 PetscCheck(B_ierr >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT,-B_ierr); 3550 PetscCheck(B_ierr > B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge",B_ierr); 3551 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite",B_ierr-B_N-1); 3552 } 3553 3554 if (B_neigs > nmax) { 3555 if (pcbddc->dbg_flag) { 3556 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n",B_neigs,nmax)); 3557 } 3558 if (upart) eigs_start = scal ? 0 : B_neigs-nmax; 3559 B_neigs = nmax; 3560 } 3561 3562 nmin_s = PetscMin(nmin,B_N); 3563 if (B_neigs < nmin_s) { 3564 PetscBLASInt B_neigs2 = 0; 3565 3566 if (upart) { 3567 if (scal) { 3568 B_IU = nmin_s; 3569 B_IL = B_neigs + 1; 3570 } else { 3571 B_IL = B_N - nmin_s + 1; 3572 B_IU = B_N - B_neigs; 3573 } 3574 } else { 3575 B_IL = B_neigs + 1; 3576 B_IU = nmin_s; 3577 } 3578 if (pcbddc->dbg_flag) { 3579 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU)); 3580 } 3581 if (sub_schurs->is_symmetric) { 3582 PetscInt j,k; 3583 for (j=0;j<subset_size;j++) { 3584 for (k=j;k<subset_size;k++) { 3585 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3586 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3587 } 3588 } 3589 } else { 3590 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3591 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3592 } 3593 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3594 #if defined(PETSC_USE_COMPLEX) 3595 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3596 #else 3597 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3598 #endif 3599 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3600 PetscCall(PetscFPTrapPop()); 3601 B_neigs += B_neigs2; 3602 } 3603 if (B_ierr) { 3604 PetscCheck(B_ierr >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT,-B_ierr); 3605 PetscCheck(B_ierr > B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge",B_ierr); 3606 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite",B_ierr-B_N-1); 3607 } 3608 if (pcbddc->dbg_flag) { 3609 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %" PetscBLASInt_FMT " eigs\n",B_neigs)); 3610 for (j=0;j<B_neigs;j++) { 3611 if (!sub_schurs->gdsw) { 3612 if (eigs[j] == 0.0) { 3613 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n")); 3614 } else { 3615 if (upart) { 3616 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",(double)eigs[j+eigs_start])); 3617 } else { 3618 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",(double)(1./eigs[j+eigs_start]))); 3619 } 3620 } 3621 } else { 3622 double pg = (double)eigs[j+eigs_start]; 3623 if (pg < 2*PETSC_SMALL) pg = 0.0; 3624 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",pg)); 3625 } 3626 } 3627 } 3628 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3629 } 3630 /* change the basis back to the original one */ 3631 if (sub_schurs->change) { 3632 Mat change,phi,phit; 3633 3634 if (pcbddc->dbg_flag > 2) { 3635 PetscInt ii; 3636 for (ii=0;ii<B_neigs;ii++) { 3637 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n",ii,B_neigs,B_N)); 3638 for (j=0;j<B_N;j++) { 3639 #if defined(PETSC_USE_COMPLEX) 3640 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3641 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3642 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",(double)r,(double)c)); 3643 #else 3644 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",(double)(eigv[(ii+eigs_start)*subset_size+j]))); 3645 #endif 3646 } 3647 } 3648 } 3649 PetscCall(KSPGetOperators(sub_schurs->change[i],&change,NULL)); 3650 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit)); 3651 PetscCall(MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi)); 3652 PetscCall(MatCopy(phi,phit,SAME_NONZERO_PATTERN)); 3653 PetscCall(MatDestroy(&phit)); 3654 PetscCall(MatDestroy(&phi)); 3655 } 3656 maxneigs = PetscMax(B_neigs,maxneigs); 3657 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3658 if (B_neigs) { 3659 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size)); 3660 3661 if (pcbddc->dbg_flag > 1) { 3662 PetscInt ii; 3663 for (ii=0;ii<B_neigs;ii++) { 3664 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n",ii,B_neigs,B_N)); 3665 for (j=0;j<B_N;j++) { 3666 #if defined(PETSC_USE_COMPLEX) 3667 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3668 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3669 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",(double)r,(double)c)); 3670 #else 3671 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",(double)PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]))); 3672 #endif 3673 } 3674 } 3675 } 3676 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size)); 3677 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3678 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3679 cum++; 3680 } 3681 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs)); 3682 /* shift for next computation */ 3683 cumarray += subset_size*subset_size; 3684 } 3685 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3686 3687 if (mss) { 3688 if (sub_schurs->gdsw) { 3689 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray)); 3690 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3691 } else { 3692 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3693 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3694 /* destroy matrices (junk) */ 3695 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3696 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3697 } 3698 } 3699 if (allocated_S_St) PetscCall(PetscFree2(S,St)); 3700 PetscCall(PetscFree5(eigv,eigs,work,B_iwork,B_ifail)); 3701 #if defined(PETSC_USE_COMPLEX) 3702 PetscCall(PetscFree(rwork)); 3703 #endif 3704 if (pcbddc->dbg_flag) { 3705 PetscInt maxneigs_r; 3706 PetscCall(MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc))); 3707 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %" PetscInt_FMT "\n",maxneigs_r)); 3708 } 3709 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3710 PetscFunctionReturn(0); 3711 } 3712 3713 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3714 { 3715 PetscScalar *coarse_submat_vals; 3716 3717 PetscFunctionBegin; 3718 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3719 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3720 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3721 3722 /* Setup local neumann solver ksp_R */ 3723 /* PCBDDCSetUpLocalScatters should be called first! */ 3724 PetscCall(PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE)); 3725 3726 /* 3727 Setup local correction and local part of coarse basis. 3728 Gives back the dense local part of the coarse matrix in column major ordering 3729 */ 3730 PetscCall(PCBDDCSetUpCorrection(pc,&coarse_submat_vals)); 3731 3732 /* Compute total number of coarse nodes and setup coarse solver */ 3733 PetscCall(PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals)); 3734 3735 /* free */ 3736 PetscCall(PetscFree(coarse_submat_vals)); 3737 PetscFunctionReturn(0); 3738 } 3739 3740 PetscErrorCode PCBDDCResetCustomization(PC pc) 3741 { 3742 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3743 3744 PetscFunctionBegin; 3745 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3746 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3747 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3748 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3749 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3750 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3751 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3752 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3753 PetscCall(PCBDDCSetDofsSplitting(pc,0,NULL)); 3754 PetscCall(PCBDDCSetDofsSplittingLocal(pc,0,NULL)); 3755 PetscFunctionReturn(0); 3756 } 3757 3758 PetscErrorCode PCBDDCResetTopography(PC pc) 3759 { 3760 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3761 PetscInt i; 3762 3763 PetscFunctionBegin; 3764 PetscCall(MatDestroy(&pcbddc->nedcG)); 3765 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3766 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3767 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3768 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3769 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3770 PetscCall(VecDestroy(&pcbddc->work_change)); 3771 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3772 PetscCall(MatDestroy(&pcbddc->divudotp)); 3773 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3774 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3775 for (i=0;i<pcbddc->n_local_subs;i++) { 3776 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3777 } 3778 pcbddc->n_local_subs = 0; 3779 PetscCall(PetscFree(pcbddc->local_subs)); 3780 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3781 pcbddc->graphanalyzed = PETSC_FALSE; 3782 pcbddc->recompute_topography = PETSC_TRUE; 3783 pcbddc->corner_selected = PETSC_FALSE; 3784 PetscFunctionReturn(0); 3785 } 3786 3787 PetscErrorCode PCBDDCResetSolvers(PC pc) 3788 { 3789 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3790 3791 PetscFunctionBegin; 3792 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3793 if (pcbddc->coarse_phi_B) { 3794 PetscScalar *array; 3795 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&array)); 3796 PetscCall(PetscFree(array)); 3797 } 3798 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3799 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3800 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3801 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3802 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3803 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3804 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3805 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3806 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3807 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3808 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3809 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3810 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3811 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3812 PetscCall(KSPReset(pcbddc->ksp_D)); 3813 PetscCall(KSPReset(pcbddc->ksp_R)); 3814 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3815 PetscCall(MatDestroy(&pcbddc->local_mat)); 3816 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3817 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 3818 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3819 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3820 PetscCall(MatDestroy(&pcbddc->benign_change)); 3821 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3822 PetscCall(PCBDDCBenignShellMat(pc,PETSC_TRUE)); 3823 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3824 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3825 if (pcbddc->benign_zerodiag_subs) { 3826 PetscInt i; 3827 for (i=0;i<pcbddc->benign_n;i++) { 3828 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3829 } 3830 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3831 } 3832 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 3833 PetscFunctionReturn(0); 3834 } 3835 3836 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3837 { 3838 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3839 PC_IS *pcis = (PC_IS*)pc->data; 3840 VecType impVecType; 3841 PetscInt n_constraints,n_R,old_size; 3842 3843 PetscFunctionBegin; 3844 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3845 n_R = pcis->n - pcbddc->n_vertices; 3846 PetscCall(VecGetType(pcis->vec1_N,&impVecType)); 3847 /* local work vectors (try to avoid unneeded work)*/ 3848 /* R nodes */ 3849 old_size = -1; 3850 if (pcbddc->vec1_R) { 3851 PetscCall(VecGetSize(pcbddc->vec1_R,&old_size)); 3852 } 3853 if (n_R != old_size) { 3854 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3855 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3856 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R)); 3857 PetscCall(VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R)); 3858 PetscCall(VecSetType(pcbddc->vec1_R,impVecType)); 3859 PetscCall(VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R)); 3860 } 3861 /* local primal dofs */ 3862 old_size = -1; 3863 if (pcbddc->vec1_P) { 3864 PetscCall(VecGetSize(pcbddc->vec1_P,&old_size)); 3865 } 3866 if (pcbddc->local_primal_size != old_size) { 3867 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3868 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P)); 3869 PetscCall(VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size)); 3870 PetscCall(VecSetType(pcbddc->vec1_P,impVecType)); 3871 } 3872 /* local explicit constraints */ 3873 old_size = -1; 3874 if (pcbddc->vec1_C) { 3875 PetscCall(VecGetSize(pcbddc->vec1_C,&old_size)); 3876 } 3877 if (n_constraints && n_constraints != old_size) { 3878 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3879 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C)); 3880 PetscCall(VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints)); 3881 PetscCall(VecSetType(pcbddc->vec1_C,impVecType)); 3882 } 3883 PetscFunctionReturn(0); 3884 } 3885 3886 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3887 { 3888 /* pointers to pcis and pcbddc */ 3889 PC_IS* pcis = (PC_IS*)pc->data; 3890 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3891 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3892 /* submatrices of local problem */ 3893 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3894 /* submatrices of local coarse problem */ 3895 Mat S_VV,S_CV,S_VC,S_CC; 3896 /* working matrices */ 3897 Mat C_CR; 3898 /* additional working stuff */ 3899 PC pc_R; 3900 Mat F,Brhs = NULL; 3901 Vec dummy_vec; 3902 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3903 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3904 PetscScalar *work; 3905 PetscInt *idx_V_B; 3906 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3907 PetscInt i,n_R,n_D,n_B; 3908 PetscScalar one=1.0,m_one=-1.0; 3909 3910 PetscFunctionBegin; 3911 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3912 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 3913 3914 /* Set Non-overlapping dimensions */ 3915 n_vertices = pcbddc->n_vertices; 3916 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3917 n_B = pcis->n_B; 3918 n_D = pcis->n - n_B; 3919 n_R = pcis->n - n_vertices; 3920 3921 /* vertices in boundary numbering */ 3922 PetscCall(PetscMalloc1(n_vertices,&idx_V_B)); 3923 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B)); 3924 PetscCheck(i == n_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT,n_vertices,i); 3925 3926 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3927 PetscCall(PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals)); 3928 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV)); 3929 PetscCall(MatDenseSetLDA(S_VV,pcbddc->local_primal_size)); 3930 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV)); 3931 PetscCall(MatDenseSetLDA(S_CV,pcbddc->local_primal_size)); 3932 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC)); 3933 PetscCall(MatDenseSetLDA(S_VC,pcbddc->local_primal_size)); 3934 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC)); 3935 PetscCall(MatDenseSetLDA(S_CC,pcbddc->local_primal_size)); 3936 3937 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3938 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_R)); 3939 PetscCall(PCSetUp(pc_R)); 3940 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU)); 3941 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL)); 3942 lda_rhs = n_R; 3943 need_benign_correction = PETSC_FALSE; 3944 if (isLU || isCHOL) { 3945 PetscCall(PCFactorGetMatrix(pc_R,&F)); 3946 } else if (sub_schurs && sub_schurs->reuse_solver) { 3947 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3948 MatFactorType type; 3949 3950 F = reuse_solver->F; 3951 PetscCall(MatGetFactorType(F,&type)); 3952 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3953 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3954 PetscCall(MatGetSize(F,&lda_rhs,NULL)); 3955 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3956 } else F = NULL; 3957 3958 /* determine if we can use a sparse right-hand side */ 3959 sparserhs = PETSC_FALSE; 3960 if (F) { 3961 MatSolverType solver; 3962 3963 PetscCall(MatFactorGetSolverType(F,&solver)); 3964 PetscCall(PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs)); 3965 } 3966 3967 /* allocate workspace */ 3968 n = 0; 3969 if (n_constraints) { 3970 n += lda_rhs*n_constraints; 3971 } 3972 if (n_vertices) { 3973 n = PetscMax(2*lda_rhs*n_vertices,n); 3974 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3975 } 3976 if (!pcbddc->symmetric_primal) { 3977 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3978 } 3979 PetscCall(PetscMalloc1(n,&work)); 3980 3981 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3982 dummy_vec = NULL; 3983 if (need_benign_correction && lda_rhs != n_R && F) { 3984 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec)); 3985 PetscCall(VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE)); 3986 PetscCall(VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name)); 3987 } 3988 3989 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3990 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3991 3992 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3993 if (n_constraints) { 3994 Mat M3,C_B; 3995 IS is_aux; 3996 3997 /* Extract constraints on R nodes: C_{CR} */ 3998 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux)); 3999 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR)); 4000 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 4001 4002 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4003 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4004 if (!sparserhs) { 4005 PetscCall(PetscArrayzero(work,lda_rhs*n_constraints)); 4006 for (i=0;i<n_constraints;i++) { 4007 const PetscScalar *row_cmat_values; 4008 const PetscInt *row_cmat_indices; 4009 PetscInt size_of_constraint,j; 4010 4011 PetscCall(MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 4012 for (j=0;j<size_of_constraint;j++) { 4013 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4014 } 4015 PetscCall(MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 4016 } 4017 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs)); 4018 } else { 4019 Mat tC_CR; 4020 4021 PetscCall(MatScale(C_CR,-1.0)); 4022 if (lda_rhs != n_R) { 4023 PetscScalar *aa; 4024 PetscInt r,*ii,*jj; 4025 PetscBool done; 4026 4027 PetscCall(MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4028 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4029 PetscCall(MatSeqAIJGetArray(C_CR,&aa)); 4030 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR)); 4031 PetscCall(MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4032 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4033 } else { 4034 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4035 tC_CR = C_CR; 4036 } 4037 PetscCall(MatCreateTranspose(tC_CR,&Brhs)); 4038 PetscCall(MatDestroy(&tC_CR)); 4039 } 4040 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R)); 4041 if (F) { 4042 if (need_benign_correction) { 4043 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4044 4045 /* rhs is already zero on interior dofs, no need to change the rhs */ 4046 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n)); 4047 } 4048 PetscCall(MatMatSolve(F,Brhs,local_auxmat2_R)); 4049 if (need_benign_correction) { 4050 PetscScalar *marr; 4051 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4052 4053 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4054 if (lda_rhs != n_R) { 4055 for (i=0;i<n_constraints;i++) { 4056 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4057 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4058 PetscCall(VecResetArray(dummy_vec)); 4059 } 4060 } else { 4061 for (i=0;i<n_constraints;i++) { 4062 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4063 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4064 PetscCall(VecResetArray(pcbddc->vec1_R)); 4065 } 4066 } 4067 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4068 } 4069 } else { 4070 PetscScalar *marr; 4071 4072 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4073 for (i=0;i<n_constraints;i++) { 4074 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4075 PetscCall(VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs)); 4076 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4077 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4078 PetscCall(VecResetArray(pcbddc->vec1_R)); 4079 PetscCall(VecResetArray(pcbddc->vec2_R)); 4080 } 4081 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4082 } 4083 if (sparserhs) PetscCall(MatScale(C_CR,-1.0)); 4084 PetscCall(MatDestroy(&Brhs)); 4085 if (!pcbddc->switch_static) { 4086 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2)); 4087 for (i=0;i<n_constraints;i++) { 4088 Vec r, b; 4089 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R,i,&r)); 4090 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2,i,&b)); 4091 PetscCall(VecScatterBegin(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4092 PetscCall(VecScatterEnd(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4093 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2,i,&b)); 4094 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R,i,&r)); 4095 } 4096 PetscCall(MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4097 } else { 4098 if (lda_rhs != n_R) { 4099 IS dummy; 4100 4101 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy)); 4102 PetscCall(MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2)); 4103 PetscCall(ISDestroy(&dummy)); 4104 } else { 4105 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4106 pcbddc->local_auxmat2 = local_auxmat2_R; 4107 } 4108 PetscCall(MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4109 } 4110 PetscCall(ISDestroy(&is_aux)); 4111 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4112 PetscCall(MatScale(M3,m_one)); 4113 if (isCHOL) { 4114 PetscCall(MatCholeskyFactor(M3,NULL,NULL)); 4115 } else { 4116 PetscCall(MatLUFactor(M3,NULL,NULL,NULL)); 4117 } 4118 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 4119 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4120 PetscCall(MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1)); 4121 PetscCall(MatDestroy(&C_B)); 4122 PetscCall(MatCopy(M3,S_CC,SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4123 PetscCall(MatDestroy(&M3)); 4124 } 4125 4126 /* Get submatrices from subdomain matrix */ 4127 if (n_vertices) { 4128 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4129 PetscBool oldpin; 4130 #endif 4131 PetscBool isaij; 4132 IS is_aux; 4133 4134 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4135 IS tis; 4136 4137 PetscCall(ISDuplicate(pcbddc->is_R_local,&tis)); 4138 PetscCall(ISSort(tis)); 4139 PetscCall(ISComplement(tis,0,pcis->n,&is_aux)); 4140 PetscCall(ISDestroy(&tis)); 4141 } else { 4142 PetscCall(ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux)); 4143 } 4144 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4145 oldpin = pcbddc->local_mat->boundtocpu; 4146 #endif 4147 PetscCall(MatBindToCPU(pcbddc->local_mat,PETSC_TRUE)); 4148 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV)); 4149 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR)); 4150 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij)); 4151 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4152 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4153 } 4154 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV)); 4155 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4156 PetscCall(MatBindToCPU(pcbddc->local_mat,oldpin)); 4157 #endif 4158 PetscCall(ISDestroy(&is_aux)); 4159 } 4160 4161 /* Matrix of coarse basis functions (local) */ 4162 if (pcbddc->coarse_phi_B) { 4163 PetscInt on_B,on_primal,on_D=n_D; 4164 if (pcbddc->coarse_phi_D) { 4165 PetscCall(MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL)); 4166 } 4167 PetscCall(MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal)); 4168 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4169 PetscScalar *marray; 4170 4171 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&marray)); 4172 PetscCall(PetscFree(marray)); 4173 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4174 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4175 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4176 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4177 } 4178 } 4179 4180 if (!pcbddc->coarse_phi_B) { 4181 PetscScalar *marr; 4182 4183 /* memory size */ 4184 n = n_B*pcbddc->local_primal_size; 4185 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4186 if (!pcbddc->symmetric_primal) n *= 2; 4187 PetscCall(PetscCalloc1(n,&marr)); 4188 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B)); 4189 marr += n_B*pcbddc->local_primal_size; 4190 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4191 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D)); 4192 marr += n_D*pcbddc->local_primal_size; 4193 } 4194 if (!pcbddc->symmetric_primal) { 4195 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B)); 4196 marr += n_B*pcbddc->local_primal_size; 4197 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4198 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D)); 4199 } 4200 } else { 4201 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4202 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4203 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4204 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4205 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4206 } 4207 } 4208 } 4209 4210 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4211 p0_lidx_I = NULL; 4212 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4213 const PetscInt *idxs; 4214 4215 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 4216 PetscCall(PetscMalloc1(pcbddc->benign_n,&p0_lidx_I)); 4217 for (i=0;i<pcbddc->benign_n;i++) { 4218 PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i])); 4219 } 4220 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 4221 } 4222 4223 /* vertices */ 4224 if (n_vertices) { 4225 PetscBool restoreavr = PETSC_FALSE; 4226 4227 PetscCall(MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV)); 4228 4229 if (n_R) { 4230 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4231 PetscBLASInt B_N,B_one = 1; 4232 const PetscScalar *x; 4233 PetscScalar *y; 4234 4235 PetscCall(MatScale(A_RV,m_one)); 4236 if (need_benign_correction) { 4237 ISLocalToGlobalMapping RtoN; 4238 IS is_p0; 4239 PetscInt *idxs_p0,n; 4240 4241 PetscCall(PetscMalloc1(pcbddc->benign_n,&idxs_p0)); 4242 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN)); 4243 PetscCall(ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0)); 4244 PetscCheck(n == pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT,n,pcbddc->benign_n); 4245 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4246 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0)); 4247 PetscCall(MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr)); 4248 PetscCall(ISDestroy(&is_p0)); 4249 } 4250 4251 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV)); 4252 if (!sparserhs || need_benign_correction) { 4253 if (lda_rhs == n_R) { 4254 PetscCall(MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV)); 4255 } else { 4256 PetscScalar *av,*array; 4257 const PetscInt *xadj,*adjncy; 4258 PetscInt n; 4259 PetscBool flg_row; 4260 4261 array = work+lda_rhs*n_vertices; 4262 PetscCall(PetscArrayzero(array,lda_rhs*n_vertices)); 4263 PetscCall(MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV)); 4264 PetscCall(MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4265 PetscCall(MatSeqAIJGetArray(A_RV,&av)); 4266 for (i=0;i<n;i++) { 4267 PetscInt j; 4268 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4269 } 4270 PetscCall(MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4271 PetscCall(MatDestroy(&A_RV)); 4272 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV)); 4273 } 4274 if (need_benign_correction) { 4275 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4276 PetscScalar *marr; 4277 4278 PetscCall(MatDenseGetArray(A_RV,&marr)); 4279 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4280 4281 | 0 0 0 | (V) 4282 L = | 0 0 -1 | (P-p0) 4283 | 0 0 -1 | (p0) 4284 4285 */ 4286 for (i=0;i<reuse_solver->benign_n;i++) { 4287 const PetscScalar *vals; 4288 const PetscInt *idxs,*idxs_zero; 4289 PetscInt n,j,nz; 4290 4291 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4292 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4293 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4294 for (j=0;j<n;j++) { 4295 PetscScalar val = vals[j]; 4296 PetscInt k,col = idxs[j]; 4297 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4298 } 4299 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4300 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4301 } 4302 PetscCall(MatDenseRestoreArray(A_RV,&marr)); 4303 } 4304 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4305 Brhs = A_RV; 4306 } else { 4307 Mat tA_RVT,A_RVT; 4308 4309 if (!pcbddc->symmetric_primal) { 4310 /* A_RV already scaled by -1 */ 4311 PetscCall(MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT)); 4312 } else { 4313 restoreavr = PETSC_TRUE; 4314 PetscCall(MatScale(A_VR,-1.0)); 4315 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4316 A_RVT = A_VR; 4317 } 4318 if (lda_rhs != n_R) { 4319 PetscScalar *aa; 4320 PetscInt r,*ii,*jj; 4321 PetscBool done; 4322 4323 PetscCall(MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4324 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4325 PetscCall(MatSeqAIJGetArray(A_RVT,&aa)); 4326 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT)); 4327 PetscCall(MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4328 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4329 } else { 4330 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4331 tA_RVT = A_RVT; 4332 } 4333 PetscCall(MatCreateTranspose(tA_RVT,&Brhs)); 4334 PetscCall(MatDestroy(&tA_RVT)); 4335 PetscCall(MatDestroy(&A_RVT)); 4336 } 4337 if (F) { 4338 /* need to correct the rhs */ 4339 if (need_benign_correction) { 4340 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4341 PetscScalar *marr; 4342 4343 PetscCall(MatDenseGetArray(Brhs,&marr)); 4344 if (lda_rhs != n_R) { 4345 for (i=0;i<n_vertices;i++) { 4346 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4347 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE)); 4348 PetscCall(VecResetArray(dummy_vec)); 4349 } 4350 } else { 4351 for (i=0;i<n_vertices;i++) { 4352 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4353 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE)); 4354 PetscCall(VecResetArray(pcbddc->vec1_R)); 4355 } 4356 } 4357 PetscCall(MatDenseRestoreArray(Brhs,&marr)); 4358 } 4359 PetscCall(MatMatSolve(F,Brhs,A_RRmA_RV)); 4360 if (restoreavr) PetscCall(MatScale(A_VR,-1.0)); 4361 /* need to correct the solution */ 4362 if (need_benign_correction) { 4363 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4364 PetscScalar *marr; 4365 4366 PetscCall(MatDenseGetArray(A_RRmA_RV,&marr)); 4367 if (lda_rhs != n_R) { 4368 for (i=0;i<n_vertices;i++) { 4369 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4370 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4371 PetscCall(VecResetArray(dummy_vec)); 4372 } 4373 } else { 4374 for (i=0;i<n_vertices;i++) { 4375 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4376 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4377 PetscCall(VecResetArray(pcbddc->vec1_R)); 4378 } 4379 } 4380 PetscCall(MatDenseRestoreArray(A_RRmA_RV,&marr)); 4381 } 4382 } else { 4383 PetscCall(MatDenseGetArray(Brhs,&y)); 4384 for (i=0;i<n_vertices;i++) { 4385 PetscCall(VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs)); 4386 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs)); 4387 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4388 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4389 PetscCall(VecResetArray(pcbddc->vec1_R)); 4390 PetscCall(VecResetArray(pcbddc->vec2_R)); 4391 } 4392 PetscCall(MatDenseRestoreArray(Brhs,&y)); 4393 } 4394 PetscCall(MatDestroy(&A_RV)); 4395 PetscCall(MatDestroy(&Brhs)); 4396 /* S_VV and S_CV */ 4397 if (n_constraints) { 4398 Mat B; 4399 4400 PetscCall(PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices)); 4401 for (i=0;i<n_vertices;i++) { 4402 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4403 PetscCall(VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B)); 4404 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4405 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4406 PetscCall(VecResetArray(pcis->vec1_B)); 4407 PetscCall(VecResetArray(pcbddc->vec1_R)); 4408 } 4409 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B)); 4410 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4411 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV)); 4412 PetscCall(MatProductSetType(S_CV,MATPRODUCT_AB)); 4413 PetscCall(MatProductSetFromOptions(S_CV)); 4414 PetscCall(MatProductSymbolic(S_CV)); 4415 PetscCall(MatProductNumeric(S_CV)); 4416 PetscCall(MatProductClear(S_CV)); 4417 4418 PetscCall(MatDestroy(&B)); 4419 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B)); 4420 /* Reuse B = local_auxmat2_R * S_CV */ 4421 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B)); 4422 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4423 PetscCall(MatProductSetFromOptions(B)); 4424 PetscCall(MatProductSymbolic(B)); 4425 PetscCall(MatProductNumeric(B)); 4426 4427 PetscCall(MatScale(S_CV,m_one)); 4428 PetscCall(PetscBLASIntCast(lda_rhs*n_vertices,&B_N)); 4429 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4430 PetscCall(MatDestroy(&B)); 4431 } 4432 if (lda_rhs != n_R) { 4433 PetscCall(MatDestroy(&A_RRmA_RV)); 4434 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV)); 4435 PetscCall(MatDenseSetLDA(A_RRmA_RV,lda_rhs)); 4436 } 4437 PetscCall(MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt)); 4438 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4439 if (need_benign_correction) { 4440 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4441 PetscScalar *marr,*sums; 4442 4443 PetscCall(PetscMalloc1(n_vertices,&sums)); 4444 PetscCall(MatDenseGetArray(S_VVt,&marr)); 4445 for (i=0;i<reuse_solver->benign_n;i++) { 4446 const PetscScalar *vals; 4447 const PetscInt *idxs,*idxs_zero; 4448 PetscInt n,j,nz; 4449 4450 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4451 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4452 for (j=0;j<n_vertices;j++) { 4453 PetscInt k; 4454 sums[j] = 0.; 4455 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4456 } 4457 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4458 for (j=0;j<n;j++) { 4459 PetscScalar val = vals[j]; 4460 PetscInt k; 4461 for (k=0;k<n_vertices;k++) { 4462 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4463 } 4464 } 4465 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4466 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4467 } 4468 PetscCall(PetscFree(sums)); 4469 PetscCall(MatDenseRestoreArray(S_VVt,&marr)); 4470 PetscCall(MatDestroy(&A_RV_bcorr)); 4471 } 4472 PetscCall(MatDestroy(&A_RRmA_RV)); 4473 PetscCall(PetscBLASIntCast(n_vertices*n_vertices,&B_N)); 4474 PetscCall(MatDenseGetArrayRead(A_VV,&x)); 4475 PetscCall(MatDenseGetArray(S_VVt,&y)); 4476 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4477 PetscCall(MatDenseRestoreArrayRead(A_VV,&x)); 4478 PetscCall(MatDenseRestoreArray(S_VVt,&y)); 4479 PetscCall(MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN)); 4480 PetscCall(MatDestroy(&S_VVt)); 4481 } else { 4482 PetscCall(MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN)); 4483 } 4484 PetscCall(MatDestroy(&A_VV)); 4485 4486 /* coarse basis functions */ 4487 for (i=0;i<n_vertices;i++) { 4488 Vec v; 4489 PetscScalar one = 1.0,zero = 0.0; 4490 4491 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4492 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i,&v)); 4493 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4494 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4495 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4496 PetscMPIInt rank; 4497 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),&rank)); 4498 PetscCheck(rank <= 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4499 } 4500 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4501 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4502 PetscCall(VecAssemblyEnd(v)); 4503 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i,&v)); 4504 4505 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4506 PetscInt j; 4507 4508 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i,&v)); 4509 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4510 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4511 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4512 PetscMPIInt rank; 4513 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),&rank)); 4514 PetscCheck(rank <= 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4515 } 4516 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4517 PetscCall(VecAssemblyBegin(v)); 4518 PetscCall(VecAssemblyEnd(v)); 4519 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i,&v)); 4520 } 4521 PetscCall(VecResetArray(pcbddc->vec1_R)); 4522 } 4523 /* if n_R == 0 the object is not destroyed */ 4524 PetscCall(MatDestroy(&A_RV)); 4525 } 4526 PetscCall(VecDestroy(&dummy_vec)); 4527 4528 if (n_constraints) { 4529 Mat B; 4530 4531 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B)); 4532 PetscCall(MatScale(S_CC,m_one)); 4533 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B)); 4534 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4535 PetscCall(MatProductSetFromOptions(B)); 4536 PetscCall(MatProductSymbolic(B)); 4537 PetscCall(MatProductNumeric(B)); 4538 4539 PetscCall(MatScale(S_CC,m_one)); 4540 if (n_vertices) { 4541 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4542 PetscCall(MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC)); 4543 } else { 4544 Mat S_VCt; 4545 4546 if (lda_rhs != n_R) { 4547 PetscCall(MatDestroy(&B)); 4548 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B)); 4549 PetscCall(MatDenseSetLDA(B,lda_rhs)); 4550 } 4551 PetscCall(MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt)); 4552 PetscCall(MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN)); 4553 PetscCall(MatDestroy(&S_VCt)); 4554 } 4555 } 4556 PetscCall(MatDestroy(&B)); 4557 /* coarse basis functions */ 4558 for (i=0;i<n_constraints;i++) { 4559 Vec v; 4560 4561 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4562 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4563 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4564 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4565 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4566 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4567 PetscInt j; 4568 PetscScalar zero = 0.0; 4569 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4570 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4571 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4572 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4573 PetscCall(VecAssemblyBegin(v)); 4574 PetscCall(VecAssemblyEnd(v)); 4575 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4576 } 4577 PetscCall(VecResetArray(pcbddc->vec1_R)); 4578 } 4579 } 4580 if (n_constraints) { 4581 PetscCall(MatDestroy(&local_auxmat2_R)); 4582 } 4583 PetscCall(PetscFree(p0_lidx_I)); 4584 4585 /* coarse matrix entries relative to B_0 */ 4586 if (pcbddc->benign_n) { 4587 Mat B0_B,B0_BPHI; 4588 IS is_dummy; 4589 const PetscScalar *data; 4590 PetscInt j; 4591 4592 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4593 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4594 PetscCall(ISDestroy(&is_dummy)); 4595 PetscCall(MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4596 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4597 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data)); 4598 for (j=0;j<pcbddc->benign_n;j++) { 4599 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4600 for (i=0;i<pcbddc->local_primal_size;i++) { 4601 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4602 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4603 } 4604 } 4605 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data)); 4606 PetscCall(MatDestroy(&B0_B)); 4607 PetscCall(MatDestroy(&B0_BPHI)); 4608 } 4609 4610 /* compute other basis functions for non-symmetric problems */ 4611 if (!pcbddc->symmetric_primal) { 4612 Mat B_V=NULL,B_C=NULL; 4613 PetscScalar *marray; 4614 4615 if (n_constraints) { 4616 Mat S_CCT,C_CRT; 4617 4618 PetscCall(MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT)); 4619 PetscCall(MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT)); 4620 PetscCall(MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C)); 4621 PetscCall(MatDestroy(&S_CCT)); 4622 if (n_vertices) { 4623 Mat S_VCT; 4624 4625 PetscCall(MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT)); 4626 PetscCall(MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V)); 4627 PetscCall(MatDestroy(&S_VCT)); 4628 } 4629 PetscCall(MatDestroy(&C_CRT)); 4630 } else { 4631 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V)); 4632 } 4633 if (n_vertices && n_R) { 4634 PetscScalar *av,*marray; 4635 const PetscInt *xadj,*adjncy; 4636 PetscInt n; 4637 PetscBool flg_row; 4638 4639 /* B_V = B_V - A_VR^T */ 4640 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4641 PetscCall(MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4642 PetscCall(MatSeqAIJGetArray(A_VR,&av)); 4643 PetscCall(MatDenseGetArray(B_V,&marray)); 4644 for (i=0;i<n;i++) { 4645 PetscInt j; 4646 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4647 } 4648 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4649 PetscCall(MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4650 PetscCall(MatDestroy(&A_VR)); 4651 } 4652 4653 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4654 if (n_vertices) { 4655 PetscCall(MatDenseGetArray(B_V,&marray)); 4656 for (i=0;i<n_vertices;i++) { 4657 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+i*n_R)); 4658 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4659 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4660 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4661 PetscCall(VecResetArray(pcbddc->vec1_R)); 4662 PetscCall(VecResetArray(pcbddc->vec2_R)); 4663 } 4664 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4665 } 4666 if (B_C) { 4667 PetscCall(MatDenseGetArray(B_C,&marray)); 4668 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4669 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R)); 4670 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4671 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4672 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4673 PetscCall(VecResetArray(pcbddc->vec1_R)); 4674 PetscCall(VecResetArray(pcbddc->vec2_R)); 4675 } 4676 PetscCall(MatDenseRestoreArray(B_C,&marray)); 4677 } 4678 /* coarse basis functions */ 4679 for (i=0;i<pcbddc->local_primal_size;i++) { 4680 Vec v; 4681 4682 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*n_R)); 4683 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B,i,&v)); 4684 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4685 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4686 if (i<n_vertices) { 4687 PetscScalar one = 1.0; 4688 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4689 PetscCall(VecAssemblyBegin(v)); 4690 PetscCall(VecAssemblyEnd(v)); 4691 } 4692 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B,i,&v)); 4693 4694 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4695 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D,i,&v)); 4696 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4697 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4698 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D,i,&v)); 4699 } 4700 PetscCall(VecResetArray(pcbddc->vec1_R)); 4701 } 4702 PetscCall(MatDestroy(&B_V)); 4703 PetscCall(MatDestroy(&B_C)); 4704 } 4705 4706 /* free memory */ 4707 PetscCall(PetscFree(idx_V_B)); 4708 PetscCall(MatDestroy(&S_VV)); 4709 PetscCall(MatDestroy(&S_CV)); 4710 PetscCall(MatDestroy(&S_VC)); 4711 PetscCall(MatDestroy(&S_CC)); 4712 PetscCall(PetscFree(work)); 4713 if (n_vertices) { 4714 PetscCall(MatDestroy(&A_VR)); 4715 } 4716 if (n_constraints) { 4717 PetscCall(MatDestroy(&C_CR)); 4718 } 4719 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 4720 4721 /* Checking coarse_sub_mat and coarse basis functios */ 4722 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4723 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4724 if (pcbddc->dbg_flag) { 4725 Mat coarse_sub_mat; 4726 Mat AUXMAT,TM1,TM2,TM3,TM4; 4727 Mat coarse_phi_D,coarse_phi_B; 4728 Mat coarse_psi_D,coarse_psi_B; 4729 Mat A_II,A_BB,A_IB,A_BI; 4730 Mat C_B,CPHI; 4731 IS is_dummy; 4732 Vec mones; 4733 MatType checkmattype=MATSEQAIJ; 4734 PetscReal real_value; 4735 4736 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4737 Mat A; 4738 PetscCall(PCBDDCBenignProject(pc,NULL,NULL,&A)); 4739 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II)); 4740 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB)); 4741 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI)); 4742 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB)); 4743 PetscCall(MatDestroy(&A)); 4744 } else { 4745 PetscCall(MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II)); 4746 PetscCall(MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB)); 4747 PetscCall(MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI)); 4748 PetscCall(MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB)); 4749 } 4750 PetscCall(MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D)); 4751 PetscCall(MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B)); 4752 if (!pcbddc->symmetric_primal) { 4753 PetscCall(MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D)); 4754 PetscCall(MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B)); 4755 } 4756 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat)); 4757 4758 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 4759 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal)); 4760 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4761 if (!pcbddc->symmetric_primal) { 4762 PetscCall(MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4763 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1)); 4764 PetscCall(MatDestroy(&AUXMAT)); 4765 PetscCall(MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4766 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2)); 4767 PetscCall(MatDestroy(&AUXMAT)); 4768 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4769 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4770 PetscCall(MatDestroy(&AUXMAT)); 4771 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4772 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4773 PetscCall(MatDestroy(&AUXMAT)); 4774 } else { 4775 PetscCall(MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1)); 4776 PetscCall(MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2)); 4777 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4778 PetscCall(MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4779 PetscCall(MatDestroy(&AUXMAT)); 4780 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4781 PetscCall(MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4782 PetscCall(MatDestroy(&AUXMAT)); 4783 } 4784 PetscCall(MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN)); 4785 PetscCall(MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN)); 4786 PetscCall(MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN)); 4787 PetscCall(MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1)); 4788 if (pcbddc->benign_n) { 4789 Mat B0_B,B0_BPHI; 4790 const PetscScalar *data2; 4791 PetscScalar *data; 4792 PetscInt j; 4793 4794 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4795 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4796 PetscCall(MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4797 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4798 PetscCall(MatDenseGetArray(TM1,&data)); 4799 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data2)); 4800 for (j=0;j<pcbddc->benign_n;j++) { 4801 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4802 for (i=0;i<pcbddc->local_primal_size;i++) { 4803 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4804 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4805 } 4806 } 4807 PetscCall(MatDenseRestoreArray(TM1,&data)); 4808 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data2)); 4809 PetscCall(MatDestroy(&B0_B)); 4810 PetscCall(ISDestroy(&is_dummy)); 4811 PetscCall(MatDestroy(&B0_BPHI)); 4812 } 4813 #if 0 4814 { 4815 PetscViewer viewer; 4816 char filename[256]; 4817 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4818 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4819 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4820 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4821 PetscCall(MatView(coarse_sub_mat,viewer)); 4822 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4823 PetscCall(MatView(TM1,viewer)); 4824 if (pcbddc->coarse_phi_B) { 4825 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4826 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4827 } 4828 if (pcbddc->coarse_phi_D) { 4829 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4830 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4831 } 4832 if (pcbddc->coarse_psi_B) { 4833 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4834 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4835 } 4836 if (pcbddc->coarse_psi_D) { 4837 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4838 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4839 } 4840 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4841 PetscCall(MatView(pcbddc->local_mat,viewer)); 4842 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4843 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4844 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4845 PetscCall(ISView(pcis->is_I_local,viewer)); 4846 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4847 PetscCall(ISView(pcis->is_B_local,viewer)); 4848 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4849 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4850 PetscCall(PetscViewerDestroy(&viewer)); 4851 } 4852 #endif 4853 PetscCall(MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN)); 4854 PetscCall(MatNorm(TM1,NORM_FROBENIUS,&real_value)); 4855 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4856 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4857 4858 /* check constraints */ 4859 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy)); 4860 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 4861 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4862 PetscCall(MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4863 } else { 4864 PetscScalar *data; 4865 Mat tmat; 4866 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&data)); 4867 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat)); 4868 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B,&data)); 4869 PetscCall(MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4870 PetscCall(MatDestroy(&tmat)); 4871 } 4872 PetscCall(MatCreateVecs(CPHI,&mones,NULL)); 4873 PetscCall(VecSet(mones,-1.0)); 4874 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4875 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4876 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4877 if (!pcbddc->symmetric_primal) { 4878 PetscCall(MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI)); 4879 PetscCall(VecSet(mones,-1.0)); 4880 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4881 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4882 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4883 } 4884 PetscCall(MatDestroy(&C_B)); 4885 PetscCall(MatDestroy(&CPHI)); 4886 PetscCall(ISDestroy(&is_dummy)); 4887 PetscCall(VecDestroy(&mones)); 4888 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4889 PetscCall(MatDestroy(&A_II)); 4890 PetscCall(MatDestroy(&A_BB)); 4891 PetscCall(MatDestroy(&A_IB)); 4892 PetscCall(MatDestroy(&A_BI)); 4893 PetscCall(MatDestroy(&TM1)); 4894 PetscCall(MatDestroy(&TM2)); 4895 PetscCall(MatDestroy(&TM3)); 4896 PetscCall(MatDestroy(&TM4)); 4897 PetscCall(MatDestroy(&coarse_phi_D)); 4898 PetscCall(MatDestroy(&coarse_phi_B)); 4899 if (!pcbddc->symmetric_primal) { 4900 PetscCall(MatDestroy(&coarse_psi_D)); 4901 PetscCall(MatDestroy(&coarse_psi_B)); 4902 } 4903 PetscCall(MatDestroy(&coarse_sub_mat)); 4904 } 4905 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4906 { 4907 PetscBool gpu; 4908 4909 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu)); 4910 if (gpu) { 4911 if (pcbddc->local_auxmat1) { 4912 PetscCall(MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1)); 4913 } 4914 if (pcbddc->local_auxmat2) { 4915 PetscCall(MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2)); 4916 } 4917 if (pcbddc->coarse_phi_B) { 4918 PetscCall(MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B)); 4919 } 4920 if (pcbddc->coarse_phi_D) { 4921 PetscCall(MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D)); 4922 } 4923 if (pcbddc->coarse_psi_B) { 4924 PetscCall(MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B)); 4925 } 4926 if (pcbddc->coarse_psi_D) { 4927 PetscCall(MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D)); 4928 } 4929 } 4930 } 4931 /* get back data */ 4932 *coarse_submat_vals_n = coarse_submat_vals; 4933 PetscFunctionReturn(0); 4934 } 4935 4936 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4937 { 4938 Mat *work_mat; 4939 IS isrow_s,iscol_s; 4940 PetscBool rsorted,csorted; 4941 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4942 4943 PetscFunctionBegin; 4944 PetscCall(ISSorted(isrow,&rsorted)); 4945 PetscCall(ISSorted(iscol,&csorted)); 4946 PetscCall(ISGetLocalSize(isrow,&rsize)); 4947 PetscCall(ISGetLocalSize(iscol,&csize)); 4948 4949 if (!rsorted) { 4950 const PetscInt *idxs; 4951 PetscInt *idxs_sorted,i; 4952 4953 PetscCall(PetscMalloc1(rsize,&idxs_perm_r)); 4954 PetscCall(PetscMalloc1(rsize,&idxs_sorted)); 4955 for (i=0;i<rsize;i++) { 4956 idxs_perm_r[i] = i; 4957 } 4958 PetscCall(ISGetIndices(isrow,&idxs)); 4959 PetscCall(PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r)); 4960 for (i=0;i<rsize;i++) { 4961 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4962 } 4963 PetscCall(ISRestoreIndices(isrow,&idxs)); 4964 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s)); 4965 } else { 4966 PetscCall(PetscObjectReference((PetscObject)isrow)); 4967 isrow_s = isrow; 4968 } 4969 4970 if (!csorted) { 4971 if (isrow == iscol) { 4972 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4973 iscol_s = isrow_s; 4974 } else { 4975 const PetscInt *idxs; 4976 PetscInt *idxs_sorted,i; 4977 4978 PetscCall(PetscMalloc1(csize,&idxs_perm_c)); 4979 PetscCall(PetscMalloc1(csize,&idxs_sorted)); 4980 for (i=0;i<csize;i++) { 4981 idxs_perm_c[i] = i; 4982 } 4983 PetscCall(ISGetIndices(iscol,&idxs)); 4984 PetscCall(PetscSortIntWithPermutation(csize,idxs,idxs_perm_c)); 4985 for (i=0;i<csize;i++) { 4986 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4987 } 4988 PetscCall(ISRestoreIndices(iscol,&idxs)); 4989 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s)); 4990 } 4991 } else { 4992 PetscCall(PetscObjectReference((PetscObject)iscol)); 4993 iscol_s = iscol; 4994 } 4995 4996 PetscCall(MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat)); 4997 4998 if (!rsorted || !csorted) { 4999 Mat new_mat; 5000 IS is_perm_r,is_perm_c; 5001 5002 if (!rsorted) { 5003 PetscInt *idxs_r,i; 5004 PetscCall(PetscMalloc1(rsize,&idxs_r)); 5005 for (i=0;i<rsize;i++) { 5006 idxs_r[idxs_perm_r[i]] = i; 5007 } 5008 PetscCall(PetscFree(idxs_perm_r)); 5009 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r)); 5010 } else { 5011 PetscCall(ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r)); 5012 } 5013 PetscCall(ISSetPermutation(is_perm_r)); 5014 5015 if (!csorted) { 5016 if (isrow_s == iscol_s) { 5017 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5018 is_perm_c = is_perm_r; 5019 } else { 5020 PetscInt *idxs_c,i; 5021 PetscCheck(idxs_perm_c,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5022 PetscCall(PetscMalloc1(csize,&idxs_c)); 5023 for (i=0;i<csize;i++) { 5024 idxs_c[idxs_perm_c[i]] = i; 5025 } 5026 PetscCall(PetscFree(idxs_perm_c)); 5027 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c)); 5028 } 5029 } else { 5030 PetscCall(ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c)); 5031 } 5032 PetscCall(ISSetPermutation(is_perm_c)); 5033 5034 PetscCall(MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat)); 5035 PetscCall(MatDestroy(&work_mat[0])); 5036 work_mat[0] = new_mat; 5037 PetscCall(ISDestroy(&is_perm_r)); 5038 PetscCall(ISDestroy(&is_perm_c)); 5039 } 5040 5041 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5042 *B = work_mat[0]; 5043 PetscCall(MatDestroyMatrices(1,&work_mat)); 5044 PetscCall(ISDestroy(&isrow_s)); 5045 PetscCall(ISDestroy(&iscol_s)); 5046 PetscFunctionReturn(0); 5047 } 5048 5049 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5050 { 5051 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5052 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5053 Mat new_mat,lA; 5054 IS is_local,is_global; 5055 PetscInt local_size; 5056 PetscBool isseqaij,issym,isset; 5057 5058 PetscFunctionBegin; 5059 PetscCall(MatDestroy(&pcbddc->local_mat)); 5060 PetscCall(MatGetSize(matis->A,&local_size,NULL)); 5061 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local)); 5062 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping,is_local,&is_global)); 5063 PetscCall(ISDestroy(&is_local)); 5064 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat)); 5065 PetscCall(ISDestroy(&is_global)); 5066 5067 if (pcbddc->dbg_flag) { 5068 Vec x,x_change; 5069 PetscReal error; 5070 5071 PetscCall(MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change)); 5072 PetscCall(VecSetRandom(x,NULL)); 5073 PetscCall(MatMult(ChangeOfBasisMatrix,x,x_change)); 5074 PetscCall(VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5075 PetscCall(VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5076 PetscCall(MatMult(new_mat,matis->x,matis->y)); 5077 if (!pcbddc->change_interior) { 5078 const PetscScalar *x,*y,*v; 5079 PetscReal lerror = 0.; 5080 PetscInt i; 5081 5082 PetscCall(VecGetArrayRead(matis->x,&x)); 5083 PetscCall(VecGetArrayRead(matis->y,&y)); 5084 PetscCall(VecGetArrayRead(matis->counter,&v)); 5085 for (i=0;i<local_size;i++) 5086 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5087 lerror = PetscAbsScalar(x[i]-y[i]); 5088 PetscCall(VecRestoreArrayRead(matis->x,&x)); 5089 PetscCall(VecRestoreArrayRead(matis->y,&y)); 5090 PetscCall(VecRestoreArrayRead(matis->counter,&v)); 5091 PetscCall(MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)pc))); 5092 if (error > PETSC_SMALL) { 5093 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5094 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",(double)error); 5095 } else { 5096 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",(double)error); 5097 } 5098 } 5099 } 5100 PetscCall(VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5101 PetscCall(VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5102 PetscCall(VecAXPY(x,-1.0,x_change)); 5103 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 5104 if (error > PETSC_SMALL) { 5105 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5106 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",(double)error); 5107 } else { 5108 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",(double)error); 5109 } 5110 } 5111 PetscCall(VecDestroy(&x)); 5112 PetscCall(VecDestroy(&x_change)); 5113 } 5114 5115 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5116 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA)); 5117 5118 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5119 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij)); 5120 if (isseqaij) { 5121 PetscCall(MatDestroy(&pcbddc->local_mat)); 5122 PetscCall(MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5123 if (lA) { 5124 Mat work; 5125 PetscCall(MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5126 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5127 PetscCall(MatDestroy(&work)); 5128 } 5129 } else { 5130 Mat work_mat; 5131 5132 PetscCall(MatDestroy(&pcbddc->local_mat)); 5133 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5134 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5135 PetscCall(MatDestroy(&work_mat)); 5136 if (lA) { 5137 Mat work; 5138 PetscCall(MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5139 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5140 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5141 PetscCall(MatDestroy(&work)); 5142 } 5143 } 5144 PetscCall(MatIsSymmetricKnown(matis->A,&isset,&issym)); 5145 if (isset) PetscCall(MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,issym)); 5146 PetscCall(MatDestroy(&new_mat)); 5147 PetscFunctionReturn(0); 5148 } 5149 5150 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5151 { 5152 PC_IS* pcis = (PC_IS*)(pc->data); 5153 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5154 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5155 PetscInt *idx_R_local=NULL; 5156 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5157 PetscInt vbs,bs; 5158 PetscBT bitmask=NULL; 5159 5160 PetscFunctionBegin; 5161 /* 5162 No need to setup local scatters if 5163 - primal space is unchanged 5164 AND 5165 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5166 AND 5167 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5168 */ 5169 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5170 PetscFunctionReturn(0); 5171 } 5172 /* destroy old objects */ 5173 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5174 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5175 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5176 /* Set Non-overlapping dimensions */ 5177 n_B = pcis->n_B; 5178 n_D = pcis->n - n_B; 5179 n_vertices = pcbddc->n_vertices; 5180 5181 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5182 5183 /* create auxiliary bitmask and allocate workspace */ 5184 if (!sub_schurs || !sub_schurs->reuse_solver) { 5185 PetscCall(PetscMalloc1(pcis->n-n_vertices,&idx_R_local)); 5186 PetscCall(PetscBTCreate(pcis->n,&bitmask)); 5187 for (i=0;i<n_vertices;i++) { 5188 PetscCall(PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i])); 5189 } 5190 5191 for (i=0, n_R=0; i<pcis->n; i++) { 5192 if (!PetscBTLookup(bitmask,i)) { 5193 idx_R_local[n_R++] = i; 5194 } 5195 } 5196 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5197 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5198 5199 PetscCall(ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5200 PetscCall(ISGetLocalSize(reuse_solver->is_R,&n_R)); 5201 } 5202 5203 /* Block code */ 5204 vbs = 1; 5205 PetscCall(MatGetBlockSize(pcbddc->local_mat,&bs)); 5206 if (bs>1 && !(n_vertices%bs)) { 5207 PetscBool is_blocked = PETSC_TRUE; 5208 PetscInt *vary; 5209 if (!sub_schurs || !sub_schurs->reuse_solver) { 5210 PetscCall(PetscMalloc1(pcis->n/bs,&vary)); 5211 PetscCall(PetscArrayzero(vary,pcis->n/bs)); 5212 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5213 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5214 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5215 for (i=0; i<pcis->n/bs; i++) { 5216 if (vary[i]!=0 && vary[i]!=bs) { 5217 is_blocked = PETSC_FALSE; 5218 break; 5219 } 5220 } 5221 PetscCall(PetscFree(vary)); 5222 } else { 5223 /* Verify directly the R set */ 5224 for (i=0; i<n_R/bs; i++) { 5225 PetscInt j,node=idx_R_local[bs*i]; 5226 for (j=1; j<bs; j++) { 5227 if (node != idx_R_local[bs*i+j]-j) { 5228 is_blocked = PETSC_FALSE; 5229 break; 5230 } 5231 } 5232 } 5233 } 5234 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5235 vbs = bs; 5236 for (i=0;i<n_R/vbs;i++) { 5237 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5238 } 5239 } 5240 } 5241 PetscCall(ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local)); 5242 if (sub_schurs && sub_schurs->reuse_solver) { 5243 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5244 5245 PetscCall(ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5246 PetscCall(ISDestroy(&reuse_solver->is_R)); 5247 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5248 reuse_solver->is_R = pcbddc->is_R_local; 5249 } else { 5250 PetscCall(PetscFree(idx_R_local)); 5251 } 5252 5253 /* print some info if requested */ 5254 if (pcbddc->dbg_flag) { 5255 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5256 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5257 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5258 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank)); 5259 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n",pcis->n,n_D,n_B)); 5260 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size)); 5261 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5262 } 5263 5264 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5265 if (!sub_schurs || !sub_schurs->reuse_solver) { 5266 IS is_aux1,is_aux2; 5267 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5268 5269 PetscCall(ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5270 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array1)); 5271 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array2)); 5272 PetscCall(ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5273 for (i=0; i<n_D; i++) { 5274 PetscCall(PetscBTSet(bitmask,is_indices[i])); 5275 } 5276 PetscCall(ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5277 for (i=0, j=0; i<n_R; i++) { 5278 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5279 aux_array1[j++] = i; 5280 } 5281 } 5282 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5283 PetscCall(ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5284 for (i=0, j=0; i<n_B; i++) { 5285 if (!PetscBTLookup(bitmask,is_indices[i])) { 5286 aux_array2[j++] = i; 5287 } 5288 } 5289 PetscCall(ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5290 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2)); 5291 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B)); 5292 PetscCall(ISDestroy(&is_aux1)); 5293 PetscCall(ISDestroy(&is_aux2)); 5294 5295 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5296 PetscCall(PetscMalloc1(n_D,&aux_array1)); 5297 for (i=0, j=0; i<n_R; i++) { 5298 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5299 aux_array1[j++] = i; 5300 } 5301 } 5302 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5303 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5304 PetscCall(ISDestroy(&is_aux1)); 5305 } 5306 PetscCall(PetscBTDestroy(&bitmask)); 5307 PetscCall(ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5308 } else { 5309 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5310 IS tis; 5311 PetscInt schur_size; 5312 5313 PetscCall(ISGetLocalSize(reuse_solver->is_B,&schur_size)); 5314 PetscCall(ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis)); 5315 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B)); 5316 PetscCall(ISDestroy(&tis)); 5317 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5318 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis)); 5319 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5320 PetscCall(ISDestroy(&tis)); 5321 } 5322 } 5323 PetscFunctionReturn(0); 5324 } 5325 5326 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5327 { 5328 MatNullSpace NullSpace; 5329 Mat dmat; 5330 const Vec *nullvecs; 5331 Vec v,v2,*nullvecs2; 5332 VecScatter sct = NULL; 5333 PetscContainer c; 5334 PetscScalar *ddata; 5335 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5336 PetscBool nnsp_has_cnst; 5337 5338 PetscFunctionBegin; 5339 if (!is && !B) { /* MATIS */ 5340 Mat_IS* matis = (Mat_IS*)A->data; 5341 5342 if (!B) { 5343 PetscCall(MatISGetLocalMat(A,&B)); 5344 } 5345 sct = matis->cctx; 5346 PetscCall(PetscObjectReference((PetscObject)sct)); 5347 } else { 5348 PetscCall(MatGetNullSpace(B,&NullSpace)); 5349 if (!NullSpace) { 5350 PetscCall(MatGetNearNullSpace(B,&NullSpace)); 5351 } 5352 if (NullSpace) PetscFunctionReturn(0); 5353 } 5354 PetscCall(MatGetNullSpace(A,&NullSpace)); 5355 if (!NullSpace) { 5356 PetscCall(MatGetNearNullSpace(A,&NullSpace)); 5357 } 5358 if (!NullSpace) PetscFunctionReturn(0); 5359 5360 PetscCall(MatCreateVecs(A,&v,NULL)); 5361 PetscCall(MatCreateVecs(B,&v2,NULL)); 5362 if (!sct) { 5363 PetscCall(VecScatterCreate(v,is,v2,NULL,&sct)); 5364 } 5365 PetscCall(MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs)); 5366 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5367 PetscCall(PetscMalloc1(bsiz,&nullvecs2)); 5368 PetscCall(VecGetBlockSize(v2,&bs)); 5369 PetscCall(VecGetSize(v2,&N)); 5370 PetscCall(VecGetLocalSize(v2,&n)); 5371 PetscCall(PetscMalloc1(n*bsiz,&ddata)); 5372 for (k=0;k<nnsp_size;k++) { 5373 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k])); 5374 PetscCall(VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5375 PetscCall(VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5376 } 5377 if (nnsp_has_cnst) { 5378 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size])); 5379 PetscCall(VecSet(nullvecs2[nnsp_size],1.0)); 5380 } 5381 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2)); 5382 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace)); 5383 5384 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat)); 5385 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B),&c)); 5386 PetscCall(PetscContainerSetPointer(c,ddata)); 5387 PetscCall(PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault)); 5388 PetscCall(PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c)); 5389 PetscCall(PetscContainerDestroy(&c)); 5390 PetscCall(PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat)); 5391 PetscCall(MatDestroy(&dmat)); 5392 5393 for (k=0;k<bsiz;k++) { 5394 PetscCall(VecDestroy(&nullvecs2[k])); 5395 } 5396 PetscCall(PetscFree(nullvecs2)); 5397 PetscCall(MatSetNearNullSpace(B,NullSpace)); 5398 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5399 PetscCall(VecDestroy(&v)); 5400 PetscCall(VecDestroy(&v2)); 5401 PetscCall(VecScatterDestroy(&sct)); 5402 PetscFunctionReturn(0); 5403 } 5404 5405 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5406 { 5407 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5408 PC_IS *pcis = (PC_IS*)pc->data; 5409 PC pc_temp; 5410 Mat A_RR; 5411 MatNullSpace nnsp; 5412 MatReuse reuse; 5413 PetscScalar m_one = -1.0; 5414 PetscReal value; 5415 PetscInt n_D,n_R; 5416 PetscBool issbaij,opts,isset,issym; 5417 void (*f)(void) = NULL; 5418 char dir_prefix[256],neu_prefix[256],str_level[16]; 5419 size_t len; 5420 5421 PetscFunctionBegin; 5422 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5423 /* approximate solver, propagate NearNullSpace if needed */ 5424 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5425 MatNullSpace gnnsp1,gnnsp2; 5426 PetscBool lhas,ghas; 5427 5428 PetscCall(MatGetNearNullSpace(pcbddc->local_mat,&nnsp)); 5429 PetscCall(MatGetNearNullSpace(pc->pmat,&gnnsp1)); 5430 PetscCall(MatGetNullSpace(pc->pmat,&gnnsp2)); 5431 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5432 PetscCall(MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 5433 if (!ghas && (gnnsp1 || gnnsp2)) { 5434 PetscCall(MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL)); 5435 } 5436 } 5437 5438 /* compute prefixes */ 5439 PetscCall(PetscStrcpy(dir_prefix,"")); 5440 PetscCall(PetscStrcpy(neu_prefix,"")); 5441 if (!pcbddc->current_level) { 5442 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix))); 5443 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix))); 5444 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5445 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5446 } else { 5447 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 5448 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 5449 len -= 15; /* remove "pc_bddc_coarse_" */ 5450 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5451 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5452 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5453 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1)); 5454 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1)); 5455 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5456 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5457 PetscCall(PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix))); 5458 PetscCall(PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix))); 5459 } 5460 5461 /* DIRICHLET PROBLEM */ 5462 if (dirichlet) { 5463 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5464 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5465 PetscCheck(sub_schurs && sub_schurs->reuse_solver,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5466 if (pcbddc->dbg_flag) { 5467 Mat A_IIn; 5468 5469 PetscCall(PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn)); 5470 PetscCall(MatDestroy(&pcis->A_II)); 5471 pcis->A_II = A_IIn; 5472 } 5473 } 5474 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat,&isset,&issym)); 5475 if (isset) PetscCall(MatSetOption(pcis->A_II,MAT_SYMMETRIC,issym)); 5476 5477 /* Matrix for Dirichlet problem is pcis->A_II */ 5478 n_D = pcis->n - pcis->n_B; 5479 opts = PETSC_FALSE; 5480 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5481 opts = PETSC_TRUE; 5482 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D)); 5483 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1)); 5484 /* default */ 5485 PetscCall(KSPSetType(pcbddc->ksp_D,KSPPREONLY)); 5486 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix)); 5487 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij)); 5488 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5489 if (issbaij) { 5490 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5491 } else { 5492 PetscCall(PCSetType(pc_temp,PCLU)); 5493 } 5494 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure)); 5495 } 5496 PetscCall(MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix)); 5497 PetscCall(KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II)); 5498 /* Allow user's customization */ 5499 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5500 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5501 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5502 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II)); 5503 } 5504 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5505 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5506 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5507 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5508 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5509 const PetscInt *idxs; 5510 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5511 5512 PetscCall(ISGetLocalSize(pcis->is_I_local,&nl)); 5513 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 5514 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5515 for (i=0;i<nl;i++) { 5516 for (d=0;d<cdim;d++) { 5517 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5518 } 5519 } 5520 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 5521 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5522 PetscCall(PetscFree(scoords)); 5523 } 5524 if (sub_schurs && sub_schurs->reuse_solver) { 5525 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5526 5527 PetscCall(KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver)); 5528 } 5529 5530 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5531 if (!n_D) { 5532 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5533 PetscCall(PCSetType(pc_temp,PCNONE)); 5534 } 5535 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5536 /* set ksp_D into pcis data */ 5537 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5538 PetscCall(KSPDestroy(&pcis->ksp_D)); 5539 pcis->ksp_D = pcbddc->ksp_D; 5540 } 5541 5542 /* NEUMANN PROBLEM */ 5543 A_RR = NULL; 5544 if (neumann) { 5545 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5546 PetscInt ibs,mbs; 5547 PetscBool issbaij, reuse_neumann_solver,isset,issym; 5548 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5549 5550 reuse_neumann_solver = PETSC_FALSE; 5551 if (sub_schurs && sub_schurs->reuse_solver) { 5552 IS iP; 5553 5554 reuse_neumann_solver = PETSC_TRUE; 5555 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP)); 5556 if (iP) reuse_neumann_solver = PETSC_FALSE; 5557 } 5558 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5559 PetscCall(ISGetSize(pcbddc->is_R_local,&n_R)); 5560 if (pcbddc->ksp_R) { /* already created ksp */ 5561 PetscInt nn_R; 5562 PetscCall(KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR)); 5563 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5564 PetscCall(MatGetSize(A_RR,&nn_R,NULL)); 5565 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5566 PetscCall(KSPReset(pcbddc->ksp_R)); 5567 PetscCall(MatDestroy(&A_RR)); 5568 reuse = MAT_INITIAL_MATRIX; 5569 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5570 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5571 PetscCall(MatDestroy(&A_RR)); 5572 reuse = MAT_INITIAL_MATRIX; 5573 } else { /* safe to reuse the matrix */ 5574 reuse = MAT_REUSE_MATRIX; 5575 } 5576 } 5577 /* last check */ 5578 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5579 PetscCall(MatDestroy(&A_RR)); 5580 reuse = MAT_INITIAL_MATRIX; 5581 } 5582 } else { /* first time, so we need to create the matrix */ 5583 reuse = MAT_INITIAL_MATRIX; 5584 } 5585 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5586 TODO: Get Rid of these conversions */ 5587 PetscCall(MatGetBlockSize(pcbddc->local_mat,&mbs)); 5588 PetscCall(ISGetBlockSize(pcbddc->is_R_local,&ibs)); 5589 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij)); 5590 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5591 if (matis->A == pcbddc->local_mat) { 5592 PetscCall(MatDestroy(&pcbddc->local_mat)); 5593 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5594 } else { 5595 PetscCall(MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5596 } 5597 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5598 if (matis->A == pcbddc->local_mat) { 5599 PetscCall(MatDestroy(&pcbddc->local_mat)); 5600 PetscCall(MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5601 } else { 5602 PetscCall(MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5603 } 5604 } 5605 /* extract A_RR */ 5606 if (reuse_neumann_solver) { 5607 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5608 5609 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5610 PetscCall(MatDestroy(&A_RR)); 5611 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5612 PetscCall(PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR)); 5613 } else { 5614 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR)); 5615 } 5616 } else { 5617 PetscCall(MatDestroy(&A_RR)); 5618 PetscCall(PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL)); 5619 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5620 } 5621 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5622 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR)); 5623 } 5624 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat,&isset,&issym)); 5625 if (isset) PetscCall(MatSetOption(A_RR,MAT_SYMMETRIC,issym)); 5626 opts = PETSC_FALSE; 5627 if (!pcbddc->ksp_R) { /* create object if not present */ 5628 opts = PETSC_TRUE; 5629 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R)); 5630 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1)); 5631 /* default */ 5632 PetscCall(KSPSetType(pcbddc->ksp_R,KSPPREONLY)); 5633 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix)); 5634 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5635 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij)); 5636 if (issbaij) { 5637 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5638 } else { 5639 PetscCall(PCSetType(pc_temp,PCLU)); 5640 } 5641 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure)); 5642 } 5643 PetscCall(KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR)); 5644 PetscCall(MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix)); 5645 if (opts) { /* Allow user's customization once */ 5646 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5647 } 5648 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5649 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5650 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR)); 5651 } 5652 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5653 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5654 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5655 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5656 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5657 const PetscInt *idxs; 5658 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5659 5660 PetscCall(ISGetLocalSize(pcbddc->is_R_local,&nl)); 5661 PetscCall(ISGetIndices(pcbddc->is_R_local,&idxs)); 5662 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5663 for (i=0;i<nl;i++) { 5664 for (d=0;d<cdim;d++) { 5665 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5666 } 5667 } 5668 PetscCall(ISRestoreIndices(pcbddc->is_R_local,&idxs)); 5669 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5670 PetscCall(PetscFree(scoords)); 5671 } 5672 5673 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5674 if (!n_R) { 5675 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5676 PetscCall(PCSetType(pc_temp,PCNONE)); 5677 } 5678 /* Reuse solver if it is present */ 5679 if (reuse_neumann_solver) { 5680 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5681 5682 PetscCall(KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver)); 5683 } 5684 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5685 } 5686 5687 if (pcbddc->dbg_flag) { 5688 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5689 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5690 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5691 } 5692 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5693 5694 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5695 if (pcbddc->NullSpace_corr[0]) { 5696 PetscCall(PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE)); 5697 } 5698 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5699 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1])); 5700 } 5701 if (neumann && pcbddc->NullSpace_corr[2]) { 5702 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3])); 5703 } 5704 /* check Dirichlet and Neumann solvers */ 5705 if (pcbddc->dbg_flag) { 5706 if (dirichlet) { /* Dirichlet */ 5707 PetscCall(VecSetRandom(pcis->vec1_D,NULL)); 5708 PetscCall(MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D)); 5709 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D)); 5710 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 5711 PetscCall(VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D)); 5712 PetscCall(VecNorm(pcis->vec1_D,NORM_INFINITY,&value)); 5713 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,(double)value)); 5714 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5715 } 5716 if (neumann) { /* Neumann */ 5717 PetscCall(VecSetRandom(pcbddc->vec1_R,NULL)); 5718 PetscCall(MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R)); 5719 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R)); 5720 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 5721 PetscCall(VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R)); 5722 PetscCall(VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value)); 5723 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,(double)value)); 5724 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5725 } 5726 } 5727 /* free Neumann problem's matrix */ 5728 PetscCall(MatDestroy(&A_RR)); 5729 PetscFunctionReturn(0); 5730 } 5731 5732 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5733 { 5734 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5735 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5736 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5737 5738 PetscFunctionBegin; 5739 if (!reuse_solver) { 5740 PetscCall(VecSet(pcbddc->vec1_R,0.)); 5741 } 5742 if (!pcbddc->switch_static) { 5743 if (applytranspose && pcbddc->local_auxmat1) { 5744 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C)); 5745 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5746 } 5747 if (!reuse_solver) { 5748 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5749 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5750 } else { 5751 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5752 5753 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5754 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5755 } 5756 } else { 5757 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5758 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5759 PetscCall(VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5760 PetscCall(VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5761 if (applytranspose && pcbddc->local_auxmat1) { 5762 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C)); 5763 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5764 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5765 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5766 } 5767 } 5768 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5769 if (!reuse_solver || pcbddc->switch_static) { 5770 if (applytranspose) { 5771 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5772 } else { 5773 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5774 } 5775 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R)); 5776 } else { 5777 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5778 5779 if (applytranspose) { 5780 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5781 } else { 5782 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5783 } 5784 } 5785 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5786 PetscCall(VecSet(inout_B,0.)); 5787 if (!pcbddc->switch_static) { 5788 if (!reuse_solver) { 5789 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5790 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5791 } else { 5792 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5793 5794 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5795 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5796 } 5797 if (!applytranspose && pcbddc->local_auxmat1) { 5798 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5799 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B)); 5800 } 5801 } else { 5802 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5803 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5804 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5805 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5806 if (!applytranspose && pcbddc->local_auxmat1) { 5807 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5808 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R)); 5809 } 5810 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5811 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5812 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5813 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5814 } 5815 PetscFunctionReturn(0); 5816 } 5817 5818 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5819 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5820 { 5821 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5822 PC_IS* pcis = (PC_IS*) (pc->data); 5823 const PetscScalar zero = 0.0; 5824 5825 PetscFunctionBegin; 5826 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5827 if (!pcbddc->benign_apply_coarse_only) { 5828 if (applytranspose) { 5829 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P)); 5830 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5831 } else { 5832 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P)); 5833 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5834 } 5835 } else { 5836 PetscCall(VecSet(pcbddc->vec1_P,zero)); 5837 } 5838 5839 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5840 if (pcbddc->benign_n) { 5841 PetscScalar *array; 5842 PetscInt j; 5843 5844 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5845 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5846 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5847 } 5848 5849 /* start communications from local primal nodes to rhs of coarse solver */ 5850 PetscCall(VecSet(pcbddc->coarse_vec,zero)); 5851 PetscCall(PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD)); 5852 PetscCall(PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD)); 5853 5854 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5855 if (pcbddc->coarse_ksp) { 5856 Mat coarse_mat; 5857 Vec rhs,sol; 5858 MatNullSpace nullsp; 5859 PetscBool isbddc = PETSC_FALSE; 5860 5861 if (pcbddc->benign_have_null) { 5862 PC coarse_pc; 5863 5864 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5865 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 5866 /* we need to propagate to coarser levels the need for a possible benign correction */ 5867 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5868 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5869 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5870 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5871 } 5872 } 5873 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&rhs)); 5874 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&sol)); 5875 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 5876 if (applytranspose) { 5877 PetscCheck(!pcbddc->benign_apply_coarse_only,PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5878 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5879 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol)); 5880 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5881 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5882 PetscCall(MatGetTransposeNullSpace(coarse_mat,&nullsp)); 5883 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp,sol)); 5884 } else { 5885 PetscCall(MatGetNullSpace(coarse_mat,&nullsp)); 5886 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5887 PC coarse_pc; 5888 5889 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp,rhs)); 5890 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5891 PetscCall(PCPreSolve(coarse_pc,pcbddc->coarse_ksp)); 5892 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol)); 5893 PetscCall(PCPostSolve(coarse_pc,pcbddc->coarse_ksp)); 5894 } else { 5895 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5896 PetscCall(KSPSolve(pcbddc->coarse_ksp,rhs,sol)); 5897 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5898 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5899 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp,sol)); 5900 } 5901 } 5902 /* we don't need the benign correction at coarser levels anymore */ 5903 if (pcbddc->benign_have_null && isbddc) { 5904 PC coarse_pc; 5905 PC_BDDC* coarsepcbddc; 5906 5907 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5908 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5909 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5910 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5911 } 5912 } 5913 5914 /* Local solution on R nodes */ 5915 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5916 PetscCall(PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose)); 5917 } 5918 /* communications from coarse sol to local primal nodes */ 5919 PetscCall(PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE)); 5920 PetscCall(PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE)); 5921 5922 /* Sum contributions from the two levels */ 5923 if (!pcbddc->benign_apply_coarse_only) { 5924 if (applytranspose) { 5925 PetscCall(MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5926 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5927 } else { 5928 PetscCall(MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5929 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5930 } 5931 /* store p0 */ 5932 if (pcbddc->benign_n) { 5933 PetscScalar *array; 5934 PetscInt j; 5935 5936 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5937 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5938 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5939 } 5940 } else { /* expand the coarse solution */ 5941 if (applytranspose) { 5942 PetscCall(MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B)); 5943 } else { 5944 PetscCall(MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B)); 5945 } 5946 } 5947 PetscFunctionReturn(0); 5948 } 5949 5950 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5951 { 5952 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5953 Vec from,to; 5954 const PetscScalar *array; 5955 5956 PetscFunctionBegin; 5957 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5958 from = pcbddc->coarse_vec; 5959 to = pcbddc->vec1_P; 5960 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5961 Vec tvec; 5962 5963 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5964 PetscCall(VecResetArray(tvec)); 5965 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&tvec)); 5966 PetscCall(VecGetArrayRead(tvec,&array)); 5967 PetscCall(VecPlaceArray(from,array)); 5968 PetscCall(VecRestoreArrayRead(tvec,&array)); 5969 } 5970 } else { /* from local to global -> put data in coarse right hand side */ 5971 from = pcbddc->vec1_P; 5972 to = pcbddc->coarse_vec; 5973 } 5974 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 5975 PetscFunctionReturn(0); 5976 } 5977 5978 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5979 { 5980 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5981 Vec from,to; 5982 const PetscScalar *array; 5983 5984 PetscFunctionBegin; 5985 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5986 from = pcbddc->coarse_vec; 5987 to = pcbddc->vec1_P; 5988 } else { /* from local to global -> put data in coarse right hand side */ 5989 from = pcbddc->vec1_P; 5990 to = pcbddc->coarse_vec; 5991 } 5992 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 5993 if (smode == SCATTER_FORWARD) { 5994 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5995 Vec tvec; 5996 5997 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5998 PetscCall(VecGetArrayRead(to,&array)); 5999 PetscCall(VecPlaceArray(tvec,array)); 6000 PetscCall(VecRestoreArrayRead(to,&array)); 6001 } 6002 } else { 6003 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6004 PetscCall(VecResetArray(from)); 6005 } 6006 } 6007 PetscFunctionReturn(0); 6008 } 6009 6010 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6011 { 6012 PC_IS* pcis = (PC_IS*)(pc->data); 6013 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6014 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6015 /* one and zero */ 6016 PetscScalar one=1.0,zero=0.0; 6017 /* space to store constraints and their local indices */ 6018 PetscScalar *constraints_data; 6019 PetscInt *constraints_idxs,*constraints_idxs_B; 6020 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6021 PetscInt *constraints_n; 6022 /* iterators */ 6023 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6024 /* BLAS integers */ 6025 PetscBLASInt lwork,lierr; 6026 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6027 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6028 /* reuse */ 6029 PetscInt olocal_primal_size,olocal_primal_size_cc; 6030 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6031 /* change of basis */ 6032 PetscBool qr_needed; 6033 PetscBT change_basis,qr_needed_idx; 6034 /* auxiliary stuff */ 6035 PetscInt *nnz,*is_indices; 6036 PetscInt ncc; 6037 /* some quantities */ 6038 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6039 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6040 PetscReal tol; /* tolerance for retaining eigenmodes */ 6041 6042 PetscFunctionBegin; 6043 tol = PetscSqrtReal(PETSC_SMALL); 6044 /* Destroy Mat objects computed previously */ 6045 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6046 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6047 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6048 /* save info on constraints from previous setup (if any) */ 6049 olocal_primal_size = pcbddc->local_primal_size; 6050 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6051 PetscCall(PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult)); 6052 PetscCall(PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc)); 6053 PetscCall(PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc)); 6054 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 6055 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6056 6057 if (!pcbddc->adaptive_selection) { 6058 IS ISForVertices,*ISForFaces,*ISForEdges; 6059 MatNullSpace nearnullsp; 6060 const Vec *nearnullvecs; 6061 Vec *localnearnullsp; 6062 PetscScalar *array; 6063 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size,o_nf,o_ne; 6064 PetscBool nnsp_has_cnst; 6065 /* LAPACK working arrays for SVD or POD */ 6066 PetscBool skip_lapack,boolforchange; 6067 PetscScalar *work; 6068 PetscReal *singular_vals; 6069 #if defined(PETSC_USE_COMPLEX) 6070 PetscReal *rwork; 6071 #endif 6072 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6073 PetscBLASInt dummy_int=1; 6074 PetscScalar dummy_scalar=1.; 6075 PetscBool use_pod = PETSC_FALSE; 6076 6077 /* MKL SVD with same input gives different results on different processes! */ 6078 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6079 use_pod = PETSC_TRUE; 6080 #endif 6081 /* Get index sets for faces, edges and vertices from graph */ 6082 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices)); 6083 o_nf = n_ISForFaces; 6084 o_ne = n_ISForEdges; 6085 n_vertices = 0; 6086 if (ISForVertices) PetscCall(ISGetSize(ISForVertices,&n_vertices)); 6087 /* print some info */ 6088 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6089 6090 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6091 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 6092 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6093 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6094 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n",PetscGlobalRank,n_vertices,pcbddc->use_vertices)); 6095 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges)); 6096 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces)); 6097 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6098 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6099 } 6100 6101 if (!pcbddc->use_vertices) n_vertices = 0; 6102 if (!pcbddc->use_edges) n_ISForEdges = 0; 6103 if (!pcbddc->use_faces) n_ISForFaces = 0; 6104 6105 /* check if near null space is attached to global mat */ 6106 if (pcbddc->use_nnsp) { 6107 PetscCall(MatGetNearNullSpace(pc->pmat,&nearnullsp)); 6108 } else nearnullsp = NULL; 6109 6110 if (nearnullsp) { 6111 PetscCall(MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs)); 6112 /* remove any stored info */ 6113 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6114 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6115 /* store information for BDDC solver reuse */ 6116 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6117 pcbddc->onearnullspace = nearnullsp; 6118 PetscCall(PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state)); 6119 for (i=0;i<nnsp_size;i++) { 6120 PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i])); 6121 } 6122 } else { /* if near null space is not provided BDDC uses constants by default */ 6123 nnsp_size = 0; 6124 nnsp_has_cnst = PETSC_TRUE; 6125 } 6126 /* get max number of constraints on a single cc */ 6127 max_constraints = nnsp_size; 6128 if (nnsp_has_cnst) max_constraints++; 6129 6130 /* 6131 Evaluate maximum storage size needed by the procedure 6132 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6133 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6134 There can be multiple constraints per connected component 6135 */ 6136 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6137 PetscCall(PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n)); 6138 6139 total_counts = n_ISForFaces+n_ISForEdges; 6140 total_counts *= max_constraints; 6141 total_counts += n_vertices; 6142 PetscCall(PetscBTCreate(total_counts,&change_basis)); 6143 6144 total_counts = 0; 6145 max_size_of_constraint = 0; 6146 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6147 IS used_is; 6148 if (i<n_ISForEdges) { 6149 used_is = ISForEdges[i]; 6150 } else { 6151 used_is = ISForFaces[i-n_ISForEdges]; 6152 } 6153 PetscCall(ISGetSize(used_is,&j)); 6154 total_counts += j; 6155 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6156 } 6157 PetscCall(PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B)); 6158 6159 /* get local part of global near null space vectors */ 6160 PetscCall(PetscMalloc1(nnsp_size,&localnearnullsp)); 6161 for (k=0;k<nnsp_size;k++) { 6162 PetscCall(VecDuplicate(pcis->vec1_N,&localnearnullsp[k])); 6163 PetscCall(VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6164 PetscCall(VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6165 } 6166 6167 /* whether or not to skip lapack calls */ 6168 skip_lapack = PETSC_TRUE; 6169 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6170 6171 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6172 if (!skip_lapack) { 6173 PetscScalar temp_work; 6174 6175 if (use_pod) { 6176 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6177 PetscCall(PetscMalloc1(max_constraints*max_constraints,&correlation_mat)); 6178 PetscCall(PetscMalloc1(max_constraints,&singular_vals)); 6179 PetscCall(PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis)); 6180 #if defined(PETSC_USE_COMPLEX) 6181 PetscCall(PetscMalloc1(3*max_constraints,&rwork)); 6182 #endif 6183 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6184 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6185 PetscCall(PetscBLASIntCast(max_constraints,&Blas_LDA)); 6186 lwork = -1; 6187 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6188 #if !defined(PETSC_USE_COMPLEX) 6189 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6190 #else 6191 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6192 #endif 6193 PetscCall(PetscFPTrapPop()); 6194 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6195 } else { 6196 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6197 /* SVD */ 6198 PetscInt max_n,min_n; 6199 max_n = max_size_of_constraint; 6200 min_n = max_constraints; 6201 if (max_size_of_constraint < max_constraints) { 6202 min_n = max_size_of_constraint; 6203 max_n = max_constraints; 6204 } 6205 PetscCall(PetscMalloc1(min_n,&singular_vals)); 6206 #if defined(PETSC_USE_COMPLEX) 6207 PetscCall(PetscMalloc1(5*min_n,&rwork)); 6208 #endif 6209 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6210 lwork = -1; 6211 PetscCall(PetscBLASIntCast(max_n,&Blas_M)); 6212 PetscCall(PetscBLASIntCast(min_n,&Blas_N)); 6213 PetscCall(PetscBLASIntCast(max_n,&Blas_LDA)); 6214 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6215 #if !defined(PETSC_USE_COMPLEX) 6216 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6217 #else 6218 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6219 #endif 6220 PetscCall(PetscFPTrapPop()); 6221 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6222 #else 6223 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6224 #endif /* on missing GESVD */ 6225 } 6226 /* Allocate optimal workspace */ 6227 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork)); 6228 PetscCall(PetscMalloc1(lwork,&work)); 6229 } 6230 /* Now we can loop on constraining sets */ 6231 total_counts = 0; 6232 constraints_idxs_ptr[0] = 0; 6233 constraints_data_ptr[0] = 0; 6234 /* vertices */ 6235 if (n_vertices) { 6236 PetscCall(ISGetIndices(ISForVertices,(const PetscInt**)&is_indices)); 6237 PetscCall(PetscArraycpy(constraints_idxs,is_indices,n_vertices)); 6238 for (i=0;i<n_vertices;i++) { 6239 constraints_n[total_counts] = 1; 6240 constraints_data[total_counts] = 1.0; 6241 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6242 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6243 total_counts++; 6244 } 6245 PetscCall(ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices)); 6246 } 6247 6248 /* edges and faces */ 6249 total_counts_cc = total_counts; 6250 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6251 IS used_is; 6252 PetscBool idxs_copied = PETSC_FALSE; 6253 6254 if (ncc<n_ISForEdges) { 6255 used_is = ISForEdges[ncc]; 6256 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6257 } else { 6258 used_is = ISForFaces[ncc-n_ISForEdges]; 6259 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6260 } 6261 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6262 6263 PetscCall(ISGetSize(used_is,&size_of_constraint)); 6264 if (!size_of_constraint) continue; 6265 PetscCall(ISGetIndices(used_is,(const PetscInt**)&is_indices)); 6266 /* change of basis should not be performed on local periodic nodes */ 6267 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6268 if (nnsp_has_cnst) { 6269 PetscScalar quad_value; 6270 6271 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6272 idxs_copied = PETSC_TRUE; 6273 6274 if (!pcbddc->use_nnsp_true) { 6275 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6276 } else { 6277 quad_value = 1.0; 6278 } 6279 for (j=0;j<size_of_constraint;j++) { 6280 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6281 } 6282 temp_constraints++; 6283 total_counts++; 6284 } 6285 for (k=0;k<nnsp_size;k++) { 6286 PetscReal real_value; 6287 PetscScalar *ptr_to_data; 6288 6289 PetscCall(VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6290 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6291 for (j=0;j<size_of_constraint;j++) { 6292 ptr_to_data[j] = array[is_indices[j]]; 6293 } 6294 PetscCall(VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6295 /* check if array is null on the connected component */ 6296 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6297 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6298 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6299 temp_constraints++; 6300 total_counts++; 6301 if (!idxs_copied) { 6302 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6303 idxs_copied = PETSC_TRUE; 6304 } 6305 } 6306 } 6307 PetscCall(ISRestoreIndices(used_is,(const PetscInt**)&is_indices)); 6308 valid_constraints = temp_constraints; 6309 if (!pcbddc->use_nnsp_true && temp_constraints) { 6310 if (temp_constraints == 1) { /* just normalize the constraint */ 6311 PetscScalar norm,*ptr_to_data; 6312 6313 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6314 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6315 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6316 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6317 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6318 } else { /* perform SVD */ 6319 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6320 6321 if (use_pod) { 6322 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6323 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6324 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6325 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6326 from that computed using LAPACKgesvd 6327 -> This is due to a different computation of eigenvectors in LAPACKheev 6328 -> The quality of the POD-computed basis will be the same */ 6329 PetscCall(PetscArrayzero(correlation_mat,temp_constraints*temp_constraints)); 6330 /* Store upper triangular part of correlation matrix */ 6331 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6332 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6333 for (j=0;j<temp_constraints;j++) { 6334 for (k=0;k<j+1;k++) { 6335 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6336 } 6337 } 6338 /* compute eigenvalues and eigenvectors of correlation matrix */ 6339 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6340 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDA)); 6341 #if !defined(PETSC_USE_COMPLEX) 6342 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6343 #else 6344 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6345 #endif 6346 PetscCall(PetscFPTrapPop()); 6347 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6348 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6349 j = 0; 6350 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6351 total_counts = total_counts-j; 6352 valid_constraints = temp_constraints-j; 6353 /* scale and copy POD basis into used quadrature memory */ 6354 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6355 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6356 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_K)); 6357 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6358 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDB)); 6359 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6360 if (j<temp_constraints) { 6361 PetscInt ii; 6362 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6363 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6364 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6365 PetscCall(PetscFPTrapPop()); 6366 for (k=0;k<temp_constraints-j;k++) { 6367 for (ii=0;ii<size_of_constraint;ii++) { 6368 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6369 } 6370 } 6371 } 6372 } else { 6373 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6374 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6375 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6376 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6377 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6378 #if !defined(PETSC_USE_COMPLEX) 6379 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6380 #else 6381 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6382 #endif 6383 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6384 PetscCall(PetscFPTrapPop()); 6385 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6386 k = temp_constraints; 6387 if (k > size_of_constraint) k = size_of_constraint; 6388 j = 0; 6389 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6390 valid_constraints = k-j; 6391 total_counts = total_counts-temp_constraints+valid_constraints; 6392 #else 6393 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6394 #endif /* on missing GESVD */ 6395 } 6396 } 6397 } 6398 /* update pointers information */ 6399 if (valid_constraints) { 6400 constraints_n[total_counts_cc] = valid_constraints; 6401 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6402 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6403 /* set change_of_basis flag */ 6404 if (boolforchange) { 6405 PetscBTSet(change_basis,total_counts_cc); 6406 } 6407 total_counts_cc++; 6408 } 6409 } 6410 /* free workspace */ 6411 if (!skip_lapack) { 6412 PetscCall(PetscFree(work)); 6413 #if defined(PETSC_USE_COMPLEX) 6414 PetscCall(PetscFree(rwork)); 6415 #endif 6416 PetscCall(PetscFree(singular_vals)); 6417 PetscCall(PetscFree(correlation_mat)); 6418 PetscCall(PetscFree(temp_basis)); 6419 } 6420 for (k=0;k<nnsp_size;k++) { 6421 PetscCall(VecDestroy(&localnearnullsp[k])); 6422 } 6423 PetscCall(PetscFree(localnearnullsp)); 6424 /* free index sets of faces, edges and vertices */ 6425 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,&o_nf,&ISForFaces,&o_ne,&ISForEdges,&ISForVertices)); 6426 } else { 6427 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6428 6429 total_counts = 0; 6430 n_vertices = 0; 6431 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6432 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 6433 } 6434 max_constraints = 0; 6435 total_counts_cc = 0; 6436 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6437 total_counts += pcbddc->adaptive_constraints_n[i]; 6438 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6439 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6440 } 6441 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6442 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6443 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6444 constraints_data = pcbddc->adaptive_constraints_data; 6445 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6446 PetscCall(PetscMalloc1(total_counts_cc,&constraints_n)); 6447 total_counts_cc = 0; 6448 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6449 if (pcbddc->adaptive_constraints_n[i]) { 6450 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6451 } 6452 } 6453 6454 max_size_of_constraint = 0; 6455 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6456 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B)); 6457 /* Change of basis */ 6458 PetscCall(PetscBTCreate(total_counts_cc,&change_basis)); 6459 if (pcbddc->use_change_of_basis) { 6460 for (i=0;i<sub_schurs->n_subs;i++) { 6461 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6462 PetscCall(PetscBTSet(change_basis,i+n_vertices)); 6463 } 6464 } 6465 } 6466 } 6467 pcbddc->local_primal_size = total_counts; 6468 PetscCall(PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs)); 6469 6470 /* map constraints_idxs in boundary numbering */ 6471 if (pcbddc->use_change_of_basis) { 6472 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B)); 6473 PetscCheck(i == constraints_idxs_ptr[total_counts_cc],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT,constraints_idxs_ptr[total_counts_cc],i); 6474 } 6475 6476 /* Create constraint matrix */ 6477 PetscCall(MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix)); 6478 PetscCall(MatSetType(pcbddc->ConstraintMatrix,MATAIJ)); 6479 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n)); 6480 6481 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6482 /* determine if a QR strategy is needed for change of basis */ 6483 qr_needed = pcbddc->use_qr_single; 6484 PetscCall(PetscBTCreate(total_counts_cc,&qr_needed_idx)); 6485 total_primal_vertices=0; 6486 pcbddc->local_primal_size_cc = 0; 6487 for (i=0;i<total_counts_cc;i++) { 6488 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6489 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6490 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6491 pcbddc->local_primal_size_cc += 1; 6492 } else if (PetscBTLookup(change_basis,i)) { 6493 for (k=0;k<constraints_n[i];k++) { 6494 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6495 } 6496 pcbddc->local_primal_size_cc += constraints_n[i]; 6497 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6498 PetscBTSet(qr_needed_idx,i); 6499 qr_needed = PETSC_TRUE; 6500 } 6501 } else { 6502 pcbddc->local_primal_size_cc += 1; 6503 } 6504 } 6505 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6506 pcbddc->n_vertices = total_primal_vertices; 6507 /* permute indices in order to have a sorted set of vertices */ 6508 PetscCall(PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs)); 6509 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult)); 6510 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices)); 6511 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6512 6513 /* nonzero structure of constraint matrix */ 6514 /* and get reference dof for local constraints */ 6515 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&nnz)); 6516 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6517 6518 j = total_primal_vertices; 6519 total_counts = total_primal_vertices; 6520 cum = total_primal_vertices; 6521 for (i=n_vertices;i<total_counts_cc;i++) { 6522 if (!PetscBTLookup(change_basis,i)) { 6523 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6524 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6525 cum++; 6526 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6527 for (k=0;k<constraints_n[i];k++) { 6528 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6529 nnz[j+k] = size_of_constraint; 6530 } 6531 j += constraints_n[i]; 6532 } 6533 } 6534 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz)); 6535 PetscCall(MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6536 PetscCall(PetscFree(nnz)); 6537 6538 /* set values in constraint matrix */ 6539 for (i=0;i<total_primal_vertices;i++) { 6540 PetscCall(MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES)); 6541 } 6542 total_counts = total_primal_vertices; 6543 for (i=n_vertices;i<total_counts_cc;i++) { 6544 if (!PetscBTLookup(change_basis,i)) { 6545 PetscInt *cols; 6546 6547 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6548 cols = constraints_idxs+constraints_idxs_ptr[i]; 6549 for (k=0;k<constraints_n[i];k++) { 6550 PetscInt row = total_counts+k; 6551 PetscScalar *vals; 6552 6553 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6554 PetscCall(MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES)); 6555 } 6556 total_counts += constraints_n[i]; 6557 } 6558 } 6559 /* assembling */ 6560 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6561 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6562 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view")); 6563 6564 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6565 if (pcbddc->use_change_of_basis) { 6566 /* dual and primal dofs on a single cc */ 6567 PetscInt dual_dofs,primal_dofs; 6568 /* working stuff for GEQRF */ 6569 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6570 PetscBLASInt lqr_work; 6571 /* working stuff for UNGQR */ 6572 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6573 PetscBLASInt lgqr_work; 6574 /* working stuff for TRTRS */ 6575 PetscScalar *trs_rhs = NULL; 6576 PetscBLASInt Blas_NRHS; 6577 /* pointers for values insertion into change of basis matrix */ 6578 PetscInt *start_rows,*start_cols; 6579 PetscScalar *start_vals; 6580 /* working stuff for values insertion */ 6581 PetscBT is_primal; 6582 PetscInt *aux_primal_numbering_B; 6583 /* matrix sizes */ 6584 PetscInt global_size,local_size; 6585 /* temporary change of basis */ 6586 Mat localChangeOfBasisMatrix; 6587 /* extra space for debugging */ 6588 PetscScalar *dbg_work = NULL; 6589 6590 PetscCall(MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix)); 6591 PetscCall(MatSetType(localChangeOfBasisMatrix,MATAIJ)); 6592 PetscCall(MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n)); 6593 /* nonzeros for local mat */ 6594 PetscCall(PetscMalloc1(pcis->n,&nnz)); 6595 if (!pcbddc->benign_change || pcbddc->fake_change) { 6596 for (i=0;i<pcis->n;i++) nnz[i]=1; 6597 } else { 6598 const PetscInt *ii; 6599 PetscInt n; 6600 PetscBool flg_row; 6601 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6602 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6603 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6604 } 6605 for (i=n_vertices;i<total_counts_cc;i++) { 6606 if (PetscBTLookup(change_basis,i)) { 6607 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6608 if (PetscBTLookup(qr_needed_idx,i)) { 6609 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6610 } else { 6611 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6612 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6613 } 6614 } 6615 } 6616 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz)); 6617 PetscCall(MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6618 PetscCall(PetscFree(nnz)); 6619 /* Set interior change in the matrix */ 6620 if (!pcbddc->benign_change || pcbddc->fake_change) { 6621 for (i=0;i<pcis->n;i++) { 6622 PetscCall(MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES)); 6623 } 6624 } else { 6625 const PetscInt *ii,*jj; 6626 PetscScalar *aa; 6627 PetscInt n; 6628 PetscBool flg_row; 6629 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6630 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change,&aa)); 6631 for (i=0;i<n;i++) { 6632 PetscCall(MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES)); 6633 } 6634 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change,&aa)); 6635 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6636 } 6637 6638 if (pcbddc->dbg_flag) { 6639 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6640 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank)); 6641 } 6642 6643 /* Now we loop on the constraints which need a change of basis */ 6644 /* 6645 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6646 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6647 6648 Basic blocks of change of basis matrix T computed: 6649 6650 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6651 6652 | 1 0 ... 0 s_1/S | 6653 | 0 1 ... 0 s_2/S | 6654 | ... | 6655 | 0 ... 1 s_{n-1}/S | 6656 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6657 6658 with S = \sum_{i=1}^n s_i^2 6659 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6660 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6661 6662 - QR decomposition of constraints otherwise 6663 */ 6664 if (qr_needed && max_size_of_constraint) { 6665 /* space to store Q */ 6666 PetscCall(PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis)); 6667 /* array to store scaling factors for reflectors */ 6668 PetscCall(PetscMalloc1(max_constraints,&qr_tau)); 6669 /* first we issue queries for optimal work */ 6670 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6671 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6672 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6673 lqr_work = -1; 6674 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6675 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6676 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work)); 6677 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work)); 6678 lgqr_work = -1; 6679 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6680 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_N)); 6681 PetscCall(PetscBLASIntCast(max_constraints,&Blas_K)); 6682 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6683 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6684 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6685 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6686 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work)); 6687 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work)); 6688 /* array to store rhs and solution of triangular solver */ 6689 PetscCall(PetscMalloc1(max_constraints*max_constraints,&trs_rhs)); 6690 /* allocating workspace for check */ 6691 if (pcbddc->dbg_flag) { 6692 PetscCall(PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work)); 6693 } 6694 } 6695 /* array to store whether a node is primal or not */ 6696 PetscCall(PetscBTCreate(pcis->n_B,&is_primal)); 6697 PetscCall(PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B)); 6698 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B)); 6699 PetscCheck(i == total_primal_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT,total_primal_vertices,i); 6700 for (i=0;i<total_primal_vertices;i++) { 6701 PetscCall(PetscBTSet(is_primal,aux_primal_numbering_B[i])); 6702 } 6703 PetscCall(PetscFree(aux_primal_numbering_B)); 6704 6705 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6706 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6707 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6708 if (PetscBTLookup(change_basis,total_counts)) { 6709 /* get constraint info */ 6710 primal_dofs = constraints_n[total_counts]; 6711 dual_dofs = size_of_constraint-primal_dofs; 6712 6713 if (pcbddc->dbg_flag) { 6714 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n",total_counts,primal_dofs,size_of_constraint)); 6715 } 6716 6717 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6718 6719 /* copy quadrature constraints for change of basis check */ 6720 if (pcbddc->dbg_flag) { 6721 PetscCall(PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6722 } 6723 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6724 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6725 6726 /* compute QR decomposition of constraints */ 6727 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6728 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6729 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6730 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6731 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6732 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6733 PetscCall(PetscFPTrapPop()); 6734 6735 /* explicitly compute R^-T */ 6736 PetscCall(PetscArrayzero(trs_rhs,primal_dofs*primal_dofs)); 6737 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6738 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6739 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_NRHS)); 6740 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6741 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6742 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6743 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6744 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6745 PetscCall(PetscFPTrapPop()); 6746 6747 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6748 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6749 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6750 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6751 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6752 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6753 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6754 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6755 PetscCall(PetscFPTrapPop()); 6756 6757 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6758 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6759 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6760 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6761 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6762 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6763 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6764 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6765 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6766 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6767 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6768 PetscCall(PetscFPTrapPop()); 6769 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6770 6771 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6772 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6773 /* insert cols for primal dofs */ 6774 for (j=0;j<primal_dofs;j++) { 6775 start_vals = &qr_basis[j*size_of_constraint]; 6776 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6777 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6778 } 6779 /* insert cols for dual dofs */ 6780 for (j=0,k=0;j<dual_dofs;k++) { 6781 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6782 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6783 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6784 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6785 j++; 6786 } 6787 } 6788 6789 /* check change of basis */ 6790 if (pcbddc->dbg_flag) { 6791 PetscInt ii,jj; 6792 PetscBool valid_qr=PETSC_TRUE; 6793 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_M)); 6794 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6795 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_K)); 6796 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6797 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDB)); 6798 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDC)); 6799 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6800 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6801 PetscCall(PetscFPTrapPop()); 6802 for (jj=0;jj<size_of_constraint;jj++) { 6803 for (ii=0;ii<primal_dofs;ii++) { 6804 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6805 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6806 } 6807 } 6808 if (!valid_qr) { 6809 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n")); 6810 for (jj=0;jj<size_of_constraint;jj++) { 6811 for (ii=0;ii<primal_dofs;ii++) { 6812 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6813 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n",jj,ii,(double)PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6814 } 6815 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6816 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n",jj,ii,(double)PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6817 } 6818 } 6819 } 6820 } else { 6821 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n")); 6822 } 6823 } 6824 } else { /* simple transformation block */ 6825 PetscInt row,col; 6826 PetscScalar val,norm; 6827 6828 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6829 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6830 for (j=0;j<size_of_constraint;j++) { 6831 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6832 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6833 if (!PetscBTLookup(is_primal,row_B)) { 6834 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6835 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES)); 6836 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES)); 6837 } else { 6838 for (k=0;k<size_of_constraint;k++) { 6839 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6840 if (row != col) { 6841 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6842 } else { 6843 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6844 } 6845 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES)); 6846 } 6847 } 6848 } 6849 if (pcbddc->dbg_flag) { 6850 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n")); 6851 } 6852 } 6853 } else { 6854 if (pcbddc->dbg_flag) { 6855 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n",total_counts,size_of_constraint)); 6856 } 6857 } 6858 } 6859 6860 /* free workspace */ 6861 if (qr_needed) { 6862 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 6863 PetscCall(PetscFree(trs_rhs)); 6864 PetscCall(PetscFree(qr_tau)); 6865 PetscCall(PetscFree(qr_work)); 6866 PetscCall(PetscFree(gqr_work)); 6867 PetscCall(PetscFree(qr_basis)); 6868 } 6869 PetscCall(PetscBTDestroy(&is_primal)); 6870 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6871 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6872 6873 /* assembling of global change of variable */ 6874 if (!pcbddc->fake_change) { 6875 Mat tmat; 6876 PetscInt bs; 6877 6878 PetscCall(VecGetSize(pcis->vec1_global,&global_size)); 6879 PetscCall(VecGetLocalSize(pcis->vec1_global,&local_size)); 6880 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat)); 6881 PetscCall(MatISSetLocalMat(tmat,localChangeOfBasisMatrix)); 6882 PetscCall(MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY)); 6883 PetscCall(MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY)); 6884 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix)); 6885 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ)); 6886 PetscCall(MatGetBlockSize(pc->pmat,&bs)); 6887 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs)); 6888 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size)); 6889 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE)); 6890 PetscCall(MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix)); 6891 PetscCall(MatDestroy(&tmat)); 6892 PetscCall(VecSet(pcis->vec1_global,0.0)); 6893 PetscCall(VecSet(pcis->vec1_N,1.0)); 6894 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6895 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6896 PetscCall(VecReciprocal(pcis->vec1_global)); 6897 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL)); 6898 6899 /* check */ 6900 if (pcbddc->dbg_flag) { 6901 PetscReal error; 6902 Vec x,x_change; 6903 6904 PetscCall(VecDuplicate(pcis->vec1_global,&x)); 6905 PetscCall(VecDuplicate(pcis->vec1_global,&x_change)); 6906 PetscCall(VecSetRandom(x,NULL)); 6907 PetscCall(VecCopy(x,pcis->vec1_global)); 6908 PetscCall(VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6909 PetscCall(VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6910 PetscCall(MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N)); 6911 PetscCall(VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6912 PetscCall(VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6913 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change)); 6914 PetscCall(VecAXPY(x,-1.0,x_change)); 6915 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 6916 if (error > PETSC_SMALL) { 6917 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",(double)error); 6918 } 6919 PetscCall(VecDestroy(&x)); 6920 PetscCall(VecDestroy(&x_change)); 6921 } 6922 /* adapt sub_schurs computed (if any) */ 6923 if (pcbddc->use_deluxe_scaling) { 6924 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6925 6926 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6927 if (sub_schurs && sub_schurs->S_Ej_all) { 6928 Mat S_new,tmat; 6929 IS is_all_N,is_V_Sall = NULL; 6930 6931 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N)); 6932 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat)); 6933 if (pcbddc->deluxe_zerorows) { 6934 ISLocalToGlobalMapping NtoSall; 6935 IS is_V; 6936 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V)); 6937 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall)); 6938 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall)); 6939 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6940 PetscCall(ISDestroy(&is_V)); 6941 } 6942 PetscCall(ISDestroy(&is_all_N)); 6943 PetscCall(MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 6944 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6945 PetscCall(PetscObjectReference((PetscObject)S_new)); 6946 if (pcbddc->deluxe_zerorows) { 6947 const PetscScalar *array; 6948 const PetscInt *idxs_V,*idxs_all; 6949 PetscInt i,n_V; 6950 6951 PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 6952 PetscCall(ISGetLocalSize(is_V_Sall,&n_V)); 6953 PetscCall(ISGetIndices(is_V_Sall,&idxs_V)); 6954 PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs_all)); 6955 PetscCall(VecGetArrayRead(pcis->D,&array)); 6956 for (i=0;i<n_V;i++) { 6957 PetscScalar val; 6958 PetscInt idx; 6959 6960 idx = idxs_V[i]; 6961 val = array[idxs_all[idxs_V[i]]]; 6962 PetscCall(MatSetValue(S_new,idx,idx,val,INSERT_VALUES)); 6963 } 6964 PetscCall(MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY)); 6965 PetscCall(MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY)); 6966 PetscCall(VecRestoreArrayRead(pcis->D,&array)); 6967 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all)); 6968 PetscCall(ISRestoreIndices(is_V_Sall,&idxs_V)); 6969 } 6970 sub_schurs->S_Ej_all = S_new; 6971 PetscCall(MatDestroy(&S_new)); 6972 if (sub_schurs->sum_S_Ej_all) { 6973 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 6974 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 6975 PetscCall(PetscObjectReference((PetscObject)S_new)); 6976 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 6977 sub_schurs->sum_S_Ej_all = S_new; 6978 PetscCall(MatDestroy(&S_new)); 6979 } 6980 PetscCall(ISDestroy(&is_V_Sall)); 6981 PetscCall(MatDestroy(&tmat)); 6982 } 6983 /* destroy any change of basis context in sub_schurs */ 6984 if (sub_schurs && sub_schurs->change) { 6985 PetscInt i; 6986 6987 for (i=0;i<sub_schurs->n_subs;i++) { 6988 PetscCall(KSPDestroy(&sub_schurs->change[i])); 6989 } 6990 PetscCall(PetscFree(sub_schurs->change)); 6991 } 6992 } 6993 if (pcbddc->switch_static) { /* need to save the local change */ 6994 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6995 } else { 6996 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 6997 } 6998 /* determine if any process has changed the pressures locally */ 6999 pcbddc->change_interior = pcbddc->benign_have_null; 7000 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7001 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7002 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7003 pcbddc->use_qr_single = qr_needed; 7004 } 7005 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7006 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7007 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7008 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7009 } else { 7010 Mat benign_global = NULL; 7011 if (pcbddc->benign_have_null) { 7012 Mat M; 7013 7014 pcbddc->change_interior = PETSC_TRUE; 7015 PetscCall(VecCopy(matis->counter,pcis->vec1_N)); 7016 PetscCall(VecReciprocal(pcis->vec1_N)); 7017 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global)); 7018 if (pcbddc->benign_change) { 7019 PetscCall(MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M)); 7020 PetscCall(MatDiagonalScale(M,pcis->vec1_N,NULL)); 7021 } else { 7022 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M)); 7023 PetscCall(MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES)); 7024 } 7025 PetscCall(MatISSetLocalMat(benign_global,M)); 7026 PetscCall(MatDestroy(&M)); 7027 PetscCall(MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY)); 7028 PetscCall(MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY)); 7029 } 7030 if (pcbddc->user_ChangeOfBasisMatrix) { 7031 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix)); 7032 PetscCall(MatDestroy(&benign_global)); 7033 } else if (pcbddc->benign_have_null) { 7034 pcbddc->ChangeOfBasisMatrix = benign_global; 7035 } 7036 } 7037 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7038 IS is_global; 7039 const PetscInt *gidxs; 7040 7041 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping,&gidxs)); 7042 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global)); 7043 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping,&gidxs)); 7044 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change)); 7045 PetscCall(ISDestroy(&is_global)); 7046 } 7047 } 7048 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7049 PetscCall(VecDuplicate(pcis->vec1_global,&pcbddc->work_change)); 7050 } 7051 7052 if (!pcbddc->fake_change) { 7053 /* add pressure dofs to set of primal nodes for numbering purposes */ 7054 for (i=0;i<pcbddc->benign_n;i++) { 7055 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7056 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7057 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7058 pcbddc->local_primal_size_cc++; 7059 pcbddc->local_primal_size++; 7060 } 7061 7062 /* check if a new primal space has been introduced (also take into account benign trick) */ 7063 pcbddc->new_primal_space_local = PETSC_TRUE; 7064 if (olocal_primal_size == pcbddc->local_primal_size) { 7065 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7066 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7067 if (!pcbddc->new_primal_space_local) { 7068 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7069 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7070 } 7071 } 7072 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7073 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 7074 } 7075 PetscCall(PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult)); 7076 7077 /* flush dbg viewer */ 7078 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7079 7080 /* free workspace */ 7081 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7082 PetscCall(PetscBTDestroy(&change_basis)); 7083 if (!pcbddc->adaptive_selection) { 7084 PetscCall(PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n)); 7085 PetscCall(PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B)); 7086 } else { 7087 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n,pcbddc->adaptive_constraints_idxs_ptr,pcbddc->adaptive_constraints_data_ptr,pcbddc->adaptive_constraints_idxs,pcbddc->adaptive_constraints_data)); 7088 PetscCall(PetscFree(constraints_n)); 7089 PetscCall(PetscFree(constraints_idxs_B)); 7090 } 7091 PetscFunctionReturn(0); 7092 } 7093 7094 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7095 { 7096 ISLocalToGlobalMapping map; 7097 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7098 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7099 PetscInt i,N; 7100 PetscBool rcsr = PETSC_FALSE; 7101 7102 PetscFunctionBegin; 7103 if (pcbddc->recompute_topography) { 7104 pcbddc->graphanalyzed = PETSC_FALSE; 7105 /* Reset previously computed graph */ 7106 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7107 /* Init local Graph struct */ 7108 PetscCall(MatGetSize(pc->pmat,&N,NULL)); 7109 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&map,NULL)); 7110 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount)); 7111 7112 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7113 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local)); 7114 } 7115 /* Check validity of the csr graph passed in by the user */ 7116 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7117 7118 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7119 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7120 PetscInt *xadj,*adjncy; 7121 PetscInt nvtxs; 7122 PetscBool flg_row=PETSC_FALSE; 7123 7124 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7125 if (flg_row) { 7126 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES)); 7127 pcbddc->computed_rowadj = PETSC_TRUE; 7128 } 7129 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7130 rcsr = PETSC_TRUE; 7131 } 7132 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7133 7134 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7135 PetscReal *lcoords; 7136 PetscInt n; 7137 MPI_Datatype dimrealtype; 7138 7139 /* TODO: support for blocked */ 7140 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7141 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7142 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords)); 7143 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype)); 7144 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7145 PetscCall(PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7146 PetscCall(PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7147 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7148 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7149 7150 pcbddc->mat_graph->coords = lcoords; 7151 pcbddc->mat_graph->cloc = PETSC_TRUE; 7152 pcbddc->mat_graph->cnloc = n; 7153 } 7154 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7155 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7156 7157 /* Setup of Graph */ 7158 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7159 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local)); 7160 7161 /* attach info on disconnected subdomains if present */ 7162 if (pcbddc->n_local_subs) { 7163 PetscInt *local_subs,n,totn; 7164 7165 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7166 PetscCall(PetscMalloc1(n,&local_subs)); 7167 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7168 for (i=0;i<pcbddc->n_local_subs;i++) { 7169 const PetscInt *idxs; 7170 PetscInt nl,j; 7171 7172 PetscCall(ISGetLocalSize(pcbddc->local_subs[i],&nl)); 7173 PetscCall(ISGetIndices(pcbddc->local_subs[i],&idxs)); 7174 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7175 PetscCall(ISRestoreIndices(pcbddc->local_subs[i],&idxs)); 7176 } 7177 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7178 pcbddc->mat_graph->n_local_subs = totn + 1; 7179 pcbddc->mat_graph->local_subs = local_subs; 7180 } 7181 } 7182 7183 if (!pcbddc->graphanalyzed) { 7184 /* Graph's connected components analysis */ 7185 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7186 pcbddc->graphanalyzed = PETSC_TRUE; 7187 pcbddc->corner_selected = pcbddc->corner_selection; 7188 } 7189 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7190 PetscFunctionReturn(0); 7191 } 7192 7193 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7194 { 7195 PetscInt i,j,n; 7196 PetscScalar *alphas; 7197 PetscReal norm,*onorms; 7198 7199 PetscFunctionBegin; 7200 n = *nio; 7201 if (!n) PetscFunctionReturn(0); 7202 PetscCall(PetscMalloc2(n,&alphas,n,&onorms)); 7203 PetscCall(VecNormalize(vecs[0],&norm)); 7204 if (norm < PETSC_SMALL) { 7205 onorms[0] = 0.0; 7206 PetscCall(VecSet(vecs[0],0.0)); 7207 } else { 7208 onorms[0] = norm; 7209 } 7210 7211 for (i=1;i<n;i++) { 7212 PetscCall(VecMDot(vecs[i],i,vecs,alphas)); 7213 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7214 PetscCall(VecMAXPY(vecs[i],i,alphas,vecs)); 7215 PetscCall(VecNormalize(vecs[i],&norm)); 7216 if (norm < PETSC_SMALL) { 7217 onorms[i] = 0.0; 7218 PetscCall(VecSet(vecs[i],0.0)); 7219 } else { 7220 onorms[i] = norm; 7221 } 7222 } 7223 /* push nonzero vectors at the beginning */ 7224 for (i=0;i<n;i++) { 7225 if (onorms[i] == 0.0) { 7226 for (j=i+1;j<n;j++) { 7227 if (onorms[j] != 0.0) { 7228 PetscCall(VecCopy(vecs[j],vecs[i])); 7229 onorms[j] = 0.0; 7230 } 7231 } 7232 } 7233 } 7234 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7235 PetscCall(PetscFree2(alphas,onorms)); 7236 PetscFunctionReturn(0); 7237 } 7238 7239 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7240 { 7241 ISLocalToGlobalMapping mapping; 7242 Mat A; 7243 PetscInt n_neighs,*neighs,*n_shared,**shared; 7244 PetscMPIInt size,rank,color; 7245 PetscInt *xadj,*adjncy; 7246 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7247 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7248 PetscInt void_procs,*procs_candidates = NULL; 7249 PetscInt xadj_count,*count; 7250 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7251 PetscSubcomm psubcomm; 7252 MPI_Comm subcomm; 7253 7254 PetscFunctionBegin; 7255 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7256 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7257 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7258 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7259 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7260 PetscCheck(*n_subdomains >0,PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %" PetscInt_FMT,*n_subdomains); 7261 7262 if (have_void) *have_void = PETSC_FALSE; 7263 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size)); 7264 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 7265 PetscCall(MatISGetLocalMat(mat,&A)); 7266 PetscCall(MatGetLocalSize(A,&n,NULL)); 7267 im_active = !!n; 7268 PetscCall(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat))); 7269 void_procs = size - active_procs; 7270 /* get ranks of of non-active processes in mat communicator */ 7271 if (void_procs) { 7272 PetscInt ncand; 7273 7274 if (have_void) *have_void = PETSC_TRUE; 7275 PetscCall(PetscMalloc1(size,&procs_candidates)); 7276 PetscCallMPI(MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 7277 for (i=0,ncand=0;i<size;i++) { 7278 if (!procs_candidates[i]) { 7279 procs_candidates[ncand++] = i; 7280 } 7281 } 7282 /* force n_subdomains to be not greater that the number of non-active processes */ 7283 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7284 } 7285 7286 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7287 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7288 PetscCall(MatGetSize(mat,&N,NULL)); 7289 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7290 PetscInt issize,isidx,dest; 7291 if (*n_subdomains == 1) dest = 0; 7292 else dest = rank; 7293 if (im_active) { 7294 issize = 1; 7295 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7296 isidx = procs_candidates[dest]; 7297 } else { 7298 isidx = dest; 7299 } 7300 } else { 7301 issize = 0; 7302 isidx = -1; 7303 } 7304 if (*n_subdomains != 1) *n_subdomains = active_procs; 7305 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends)); 7306 PetscCall(PetscFree(procs_candidates)); 7307 PetscFunctionReturn(0); 7308 } 7309 PetscCall(PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL)); 7310 PetscCall(PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL)); 7311 threshold = PetscMax(threshold,2); 7312 7313 /* Get info on mapping */ 7314 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7315 PetscCall(ISLocalToGlobalMappingGetInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7316 7317 /* build local CSR graph of subdomains' connectivity */ 7318 PetscCall(PetscMalloc1(2,&xadj)); 7319 xadj[0] = 0; 7320 xadj[1] = PetscMax(n_neighs-1,0); 7321 PetscCall(PetscMalloc1(xadj[1],&adjncy)); 7322 PetscCall(PetscMalloc1(xadj[1],&adjncy_wgt)); 7323 PetscCall(PetscCalloc1(n,&count)); 7324 for (i=1;i<n_neighs;i++) 7325 for (j=0;j<n_shared[i];j++) 7326 count[shared[i][j]] += 1; 7327 7328 xadj_count = 0; 7329 for (i=1;i<n_neighs;i++) { 7330 for (j=0;j<n_shared[i];j++) { 7331 if (count[shared[i][j]] < threshold) { 7332 adjncy[xadj_count] = neighs[i]; 7333 adjncy_wgt[xadj_count] = n_shared[i]; 7334 xadj_count++; 7335 break; 7336 } 7337 } 7338 } 7339 xadj[1] = xadj_count; 7340 PetscCall(PetscFree(count)); 7341 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7342 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7343 7344 PetscCall(PetscMalloc1(1,&ranks_send_to_idx)); 7345 7346 /* Restrict work on active processes only */ 7347 PetscCall(PetscMPIIntCast(im_active,&color)); 7348 if (void_procs) { 7349 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm)); 7350 PetscCall(PetscSubcommSetNumber(psubcomm,2)); /* 2 groups, active process and not active processes */ 7351 PetscCall(PetscSubcommSetTypeGeneral(psubcomm,color,rank)); 7352 subcomm = PetscSubcommChild(psubcomm); 7353 } else { 7354 psubcomm = NULL; 7355 subcomm = PetscObjectComm((PetscObject)mat); 7356 } 7357 7358 v_wgt = NULL; 7359 if (!color) { 7360 PetscCall(PetscFree(xadj)); 7361 PetscCall(PetscFree(adjncy)); 7362 PetscCall(PetscFree(adjncy_wgt)); 7363 } else { 7364 Mat subdomain_adj; 7365 IS new_ranks,new_ranks_contig; 7366 MatPartitioning partitioner; 7367 PetscInt rstart=0,rend=0; 7368 PetscInt *is_indices,*oldranks; 7369 PetscMPIInt size; 7370 PetscBool aggregate; 7371 7372 PetscCallMPI(MPI_Comm_size(subcomm,&size)); 7373 if (void_procs) { 7374 PetscInt prank = rank; 7375 PetscCall(PetscMalloc1(size,&oldranks)); 7376 PetscCallMPI(MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm)); 7377 for (i=0;i<xadj[1];i++) { 7378 PetscCall(PetscFindInt(adjncy[i],size,oldranks,&adjncy[i])); 7379 } 7380 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7381 } else { 7382 oldranks = NULL; 7383 } 7384 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7385 if (aggregate) { /* TODO: all this part could be made more efficient */ 7386 PetscInt lrows,row,ncols,*cols; 7387 PetscMPIInt nrank; 7388 PetscScalar *vals; 7389 7390 PetscCallMPI(MPI_Comm_rank(subcomm,&nrank)); 7391 lrows = 0; 7392 if (nrank<redprocs) { 7393 lrows = size/redprocs; 7394 if (nrank<size%redprocs) lrows++; 7395 } 7396 PetscCall(MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj)); 7397 PetscCall(MatGetOwnershipRange(subdomain_adj,&rstart,&rend)); 7398 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 7399 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 7400 row = nrank; 7401 ncols = xadj[1]-xadj[0]; 7402 cols = adjncy; 7403 PetscCall(PetscMalloc1(ncols,&vals)); 7404 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7405 PetscCall(MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES)); 7406 PetscCall(MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7407 PetscCall(MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7408 PetscCall(PetscFree(xadj)); 7409 PetscCall(PetscFree(adjncy)); 7410 PetscCall(PetscFree(adjncy_wgt)); 7411 PetscCall(PetscFree(vals)); 7412 if (use_vwgt) { 7413 Vec v; 7414 const PetscScalar *array; 7415 PetscInt nl; 7416 7417 PetscCall(MatCreateVecs(subdomain_adj,&v,NULL)); 7418 PetscCall(VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES)); 7419 PetscCall(VecAssemblyBegin(v)); 7420 PetscCall(VecAssemblyEnd(v)); 7421 PetscCall(VecGetLocalSize(v,&nl)); 7422 PetscCall(VecGetArrayRead(v,&array)); 7423 PetscCall(PetscMalloc1(nl,&v_wgt)); 7424 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7425 PetscCall(VecRestoreArrayRead(v,&array)); 7426 PetscCall(VecDestroy(&v)); 7427 } 7428 } else { 7429 PetscCall(MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj)); 7430 if (use_vwgt) { 7431 PetscCall(PetscMalloc1(1,&v_wgt)); 7432 v_wgt[0] = n; 7433 } 7434 } 7435 /* PetscCall(MatView(subdomain_adj,0)); */ 7436 7437 /* Partition */ 7438 PetscCall(MatPartitioningCreate(subcomm,&partitioner)); 7439 #if defined(PETSC_HAVE_PTSCOTCH) 7440 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH)); 7441 #elif defined(PETSC_HAVE_PARMETIS) 7442 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS)); 7443 #else 7444 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE)); 7445 #endif 7446 PetscCall(MatPartitioningSetAdjacency(partitioner,subdomain_adj)); 7447 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner,v_wgt)); 7448 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7449 PetscCall(MatPartitioningSetNParts(partitioner,*n_subdomains)); 7450 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7451 PetscCall(MatPartitioningApply(partitioner,&new_ranks)); 7452 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7453 7454 /* renumber new_ranks to avoid "holes" in new set of processors */ 7455 PetscCall(ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig)); 7456 PetscCall(ISDestroy(&new_ranks)); 7457 PetscCall(ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7458 if (!aggregate) { 7459 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7460 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7461 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7462 } else if (oldranks) { 7463 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7464 } else { 7465 ranks_send_to_idx[0] = is_indices[0]; 7466 } 7467 } else { 7468 PetscInt idx = 0; 7469 PetscMPIInt tag; 7470 MPI_Request *reqs; 7471 7472 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag)); 7473 PetscCall(PetscMalloc1(rend-rstart,&reqs)); 7474 for (i=rstart;i<rend;i++) { 7475 PetscCallMPI(MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart])); 7476 } 7477 PetscCallMPI(MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE)); 7478 PetscCallMPI(MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE)); 7479 PetscCall(PetscFree(reqs)); 7480 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7481 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7482 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7483 } else if (oldranks) { 7484 ranks_send_to_idx[0] = oldranks[idx]; 7485 } else { 7486 ranks_send_to_idx[0] = idx; 7487 } 7488 } 7489 PetscCall(ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7490 /* clean up */ 7491 PetscCall(PetscFree(oldranks)); 7492 PetscCall(ISDestroy(&new_ranks_contig)); 7493 PetscCall(MatDestroy(&subdomain_adj)); 7494 PetscCall(MatPartitioningDestroy(&partitioner)); 7495 } 7496 PetscCall(PetscSubcommDestroy(&psubcomm)); 7497 PetscCall(PetscFree(procs_candidates)); 7498 7499 /* assemble parallel IS for sends */ 7500 i = 1; 7501 if (!color) i=0; 7502 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends)); 7503 PetscFunctionReturn(0); 7504 } 7505 7506 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7507 7508 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7509 { 7510 Mat local_mat; 7511 IS is_sends_internal; 7512 PetscInt rows,cols,new_local_rows; 7513 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7514 PetscBool ismatis,isdense,newisdense,destroy_mat; 7515 ISLocalToGlobalMapping l2gmap; 7516 PetscInt* l2gmap_indices; 7517 const PetscInt* is_indices; 7518 MatType new_local_type; 7519 /* buffers */ 7520 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7521 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7522 PetscInt *recv_buffer_idxs_local; 7523 PetscScalar *ptr_vals,*recv_buffer_vals; 7524 const PetscScalar *send_buffer_vals; 7525 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7526 /* MPI */ 7527 MPI_Comm comm,comm_n; 7528 PetscSubcomm subcomm; 7529 PetscMPIInt n_sends,n_recvs,size; 7530 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7531 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7532 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7533 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7534 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7535 7536 PetscFunctionBegin; 7537 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7538 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7539 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7540 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7541 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7542 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7543 PetscValidLogicalCollectiveBool(mat,reuse,6); 7544 PetscValidLogicalCollectiveInt(mat,nis,8); 7545 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7546 if (nvecs) { 7547 PetscCheck(nvecs <= 1,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7548 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7549 } 7550 /* further checks */ 7551 PetscCall(MatISGetLocalMat(mat,&local_mat)); 7552 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense)); 7553 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7554 PetscCall(MatGetSize(local_mat,&rows,&cols)); 7555 PetscCheck(rows == cols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7556 if (reuse && *mat_n) { 7557 PetscInt mrows,mcols,mnrows,mncols; 7558 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7559 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis)); 7560 PetscCheck(ismatis,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7561 PetscCall(MatGetSize(mat,&mrows,&mcols)); 7562 PetscCall(MatGetSize(*mat_n,&mnrows,&mncols)); 7563 PetscCheck(mrows == mnrows,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT,mrows,mnrows); 7564 PetscCheck(mcols == mncols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT,mcols,mncols); 7565 } 7566 PetscCall(MatGetBlockSize(local_mat,&bs)); 7567 PetscValidLogicalCollectiveInt(mat,bs,1); 7568 7569 /* prepare IS for sending if not provided */ 7570 if (!is_sends) { 7571 PetscCheck(n_subdomains,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7572 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL)); 7573 } else { 7574 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7575 is_sends_internal = is_sends; 7576 } 7577 7578 /* get comm */ 7579 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 7580 7581 /* compute number of sends */ 7582 PetscCall(ISGetLocalSize(is_sends_internal,&i)); 7583 PetscCall(PetscMPIIntCast(i,&n_sends)); 7584 7585 /* compute number of receives */ 7586 PetscCallMPI(MPI_Comm_size(comm,&size)); 7587 PetscCall(PetscMalloc1(size,&iflags)); 7588 PetscCall(PetscArrayzero(iflags,size)); 7589 PetscCall(ISGetIndices(is_sends_internal,&is_indices)); 7590 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7591 PetscCall(PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs)); 7592 PetscCall(PetscFree(iflags)); 7593 7594 /* restrict comm if requested */ 7595 subcomm = NULL; 7596 destroy_mat = PETSC_FALSE; 7597 if (restrict_comm) { 7598 PetscMPIInt color,subcommsize; 7599 7600 color = 0; 7601 if (restrict_full) { 7602 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7603 } else { 7604 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7605 } 7606 PetscCall(MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm)); 7607 subcommsize = size - subcommsize; 7608 /* check if reuse has been requested */ 7609 if (reuse) { 7610 if (*mat_n) { 7611 PetscMPIInt subcommsize2; 7612 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2)); 7613 PetscCheck(subcommsize == subcommsize2,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7614 comm_n = PetscObjectComm((PetscObject)*mat_n); 7615 } else { 7616 comm_n = PETSC_COMM_SELF; 7617 } 7618 } else { /* MAT_INITIAL_MATRIX */ 7619 PetscMPIInt rank; 7620 7621 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 7622 PetscCall(PetscSubcommCreate(comm,&subcomm)); 7623 PetscCall(PetscSubcommSetNumber(subcomm,2)); 7624 PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank)); 7625 comm_n = PetscSubcommChild(subcomm); 7626 } 7627 /* flag to destroy *mat_n if not significative */ 7628 if (color) destroy_mat = PETSC_TRUE; 7629 } else { 7630 comm_n = comm; 7631 } 7632 7633 /* prepare send/receive buffers */ 7634 PetscCall(PetscMalloc1(size,&ilengths_idxs)); 7635 PetscCall(PetscArrayzero(ilengths_idxs,size)); 7636 PetscCall(PetscMalloc1(size,&ilengths_vals)); 7637 PetscCall(PetscArrayzero(ilengths_vals,size)); 7638 if (nis) { 7639 PetscCall(PetscCalloc1(size,&ilengths_idxs_is)); 7640 } 7641 7642 /* Get data from local matrices */ 7643 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7644 /* TODO: See below some guidelines on how to prepare the local buffers */ 7645 /* 7646 send_buffer_vals should contain the raw values of the local matrix 7647 send_buffer_idxs should contain: 7648 - MatType_PRIVATE type 7649 - PetscInt size_of_l2gmap 7650 - PetscInt global_row_indices[size_of_l2gmap] 7651 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7652 */ 7653 { 7654 ISLocalToGlobalMapping mapping; 7655 7656 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7657 PetscCall(MatDenseGetArrayRead(local_mat,&send_buffer_vals)); 7658 PetscCall(ISLocalToGlobalMappingGetSize(mapping,&i)); 7659 PetscCall(PetscMalloc1(i+2,&send_buffer_idxs)); 7660 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7661 send_buffer_idxs[1] = i; 7662 PetscCall(ISLocalToGlobalMappingGetIndices(mapping,(const PetscInt**)&ptr_idxs)); 7663 PetscCall(PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i)); 7664 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping,(const PetscInt**)&ptr_idxs)); 7665 PetscCall(PetscMPIIntCast(i,&len)); 7666 for (i=0;i<n_sends;i++) { 7667 ilengths_vals[is_indices[i]] = len*len; 7668 ilengths_idxs[is_indices[i]] = len+2; 7669 } 7670 } 7671 PetscCall(PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals)); 7672 /* additional is (if any) */ 7673 if (nis) { 7674 PetscMPIInt psum; 7675 PetscInt j; 7676 for (j=0,psum=0;j<nis;j++) { 7677 PetscInt plen; 7678 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7679 PetscCall(PetscMPIIntCast(plen,&len)); 7680 psum += len+1; /* indices + length */ 7681 } 7682 PetscCall(PetscMalloc1(psum,&send_buffer_idxs_is)); 7683 for (j=0,psum=0;j<nis;j++) { 7684 PetscInt plen; 7685 const PetscInt *is_array_idxs; 7686 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7687 send_buffer_idxs_is[psum] = plen; 7688 PetscCall(ISGetIndices(isarray[j],&is_array_idxs)); 7689 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen)); 7690 PetscCall(ISRestoreIndices(isarray[j],&is_array_idxs)); 7691 psum += plen+1; /* indices + length */ 7692 } 7693 for (i=0;i<n_sends;i++) { 7694 ilengths_idxs_is[is_indices[i]] = psum; 7695 } 7696 PetscCall(PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is)); 7697 } 7698 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 7699 7700 buf_size_idxs = 0; 7701 buf_size_vals = 0; 7702 buf_size_idxs_is = 0; 7703 buf_size_vecs = 0; 7704 for (i=0;i<n_recvs;i++) { 7705 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7706 buf_size_vals += (PetscInt)olengths_vals[i]; 7707 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7708 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7709 } 7710 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs)); 7711 PetscCall(PetscMalloc1(buf_size_vals,&recv_buffer_vals)); 7712 PetscCall(PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is)); 7713 PetscCall(PetscMalloc1(buf_size_vecs,&recv_buffer_vecs)); 7714 7715 /* get new tags for clean communications */ 7716 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs)); 7717 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vals)); 7718 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is)); 7719 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vecs)); 7720 7721 /* allocate for requests */ 7722 PetscCall(PetscMalloc1(n_sends,&send_req_idxs)); 7723 PetscCall(PetscMalloc1(n_sends,&send_req_vals)); 7724 PetscCall(PetscMalloc1(n_sends,&send_req_idxs_is)); 7725 PetscCall(PetscMalloc1(n_sends,&send_req_vecs)); 7726 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs)); 7727 PetscCall(PetscMalloc1(n_recvs,&recv_req_vals)); 7728 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs_is)); 7729 PetscCall(PetscMalloc1(n_recvs,&recv_req_vecs)); 7730 7731 /* communications */ 7732 ptr_idxs = recv_buffer_idxs; 7733 ptr_vals = recv_buffer_vals; 7734 ptr_idxs_is = recv_buffer_idxs_is; 7735 ptr_vecs = recv_buffer_vecs; 7736 for (i=0;i<n_recvs;i++) { 7737 source_dest = onodes[i]; 7738 PetscCallMPI(MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i])); 7739 PetscCallMPI(MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i])); 7740 ptr_idxs += olengths_idxs[i]; 7741 ptr_vals += olengths_vals[i]; 7742 if (nis) { 7743 source_dest = onodes_is[i]; 7744 PetscCallMPI(MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i])); 7745 ptr_idxs_is += olengths_idxs_is[i]; 7746 } 7747 if (nvecs) { 7748 source_dest = onodes[i]; 7749 PetscCallMPI(MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i])); 7750 ptr_vecs += olengths_idxs[i]-2; 7751 } 7752 } 7753 for (i=0;i<n_sends;i++) { 7754 PetscCall(PetscMPIIntCast(is_indices[i],&source_dest)); 7755 PetscCallMPI(MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i])); 7756 PetscCallMPI(MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i])); 7757 if (nis) { 7758 PetscCallMPI(MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i])); 7759 } 7760 if (nvecs) { 7761 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 7762 PetscCallMPI(MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i])); 7763 } 7764 } 7765 PetscCall(ISRestoreIndices(is_sends_internal,&is_indices)); 7766 PetscCall(ISDestroy(&is_sends_internal)); 7767 7768 /* assemble new l2g map */ 7769 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE)); 7770 ptr_idxs = recv_buffer_idxs; 7771 new_local_rows = 0; 7772 for (i=0;i<n_recvs;i++) { 7773 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7774 ptr_idxs += olengths_idxs[i]; 7775 } 7776 PetscCall(PetscMalloc1(new_local_rows,&l2gmap_indices)); 7777 ptr_idxs = recv_buffer_idxs; 7778 new_local_rows = 0; 7779 for (i=0;i<n_recvs;i++) { 7780 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1))); 7781 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7782 ptr_idxs += olengths_idxs[i]; 7783 } 7784 PetscCall(PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices)); 7785 PetscCall(ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap)); 7786 PetscCall(PetscFree(l2gmap_indices)); 7787 7788 /* infer new local matrix type from received local matrices type */ 7789 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7790 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7791 if (n_recvs) { 7792 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7793 ptr_idxs = recv_buffer_idxs; 7794 for (i=0;i<n_recvs;i++) { 7795 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7796 new_local_type_private = MATAIJ_PRIVATE; 7797 break; 7798 } 7799 ptr_idxs += olengths_idxs[i]; 7800 } 7801 switch (new_local_type_private) { 7802 case MATDENSE_PRIVATE: 7803 new_local_type = MATSEQAIJ; 7804 bs = 1; 7805 break; 7806 case MATAIJ_PRIVATE: 7807 new_local_type = MATSEQAIJ; 7808 bs = 1; 7809 break; 7810 case MATBAIJ_PRIVATE: 7811 new_local_type = MATSEQBAIJ; 7812 break; 7813 case MATSBAIJ_PRIVATE: 7814 new_local_type = MATSEQSBAIJ; 7815 break; 7816 default: 7817 SETERRQ(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7818 } 7819 } else { /* by default, new_local_type is seqaij */ 7820 new_local_type = MATSEQAIJ; 7821 bs = 1; 7822 } 7823 7824 /* create MATIS object if needed */ 7825 if (!reuse) { 7826 PetscCall(MatGetSize(mat,&rows,&cols)); 7827 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7828 } else { 7829 /* it also destroys the local matrices */ 7830 if (*mat_n) { 7831 PetscCall(MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap)); 7832 } else { /* this is a fake object */ 7833 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7834 } 7835 } 7836 PetscCall(MatISGetLocalMat(*mat_n,&local_mat)); 7837 PetscCall(MatSetType(local_mat,new_local_type)); 7838 7839 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE)); 7840 7841 /* Global to local map of received indices */ 7842 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local)); /* needed for values insertion */ 7843 PetscCall(ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local)); 7844 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7845 7846 /* restore attributes -> type of incoming data and its size */ 7847 buf_size_idxs = 0; 7848 for (i=0;i<n_recvs;i++) { 7849 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7850 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7851 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7852 } 7853 PetscCall(PetscFree(recv_buffer_idxs)); 7854 7855 /* set preallocation */ 7856 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense)); 7857 if (!newisdense) { 7858 PetscInt *new_local_nnz=NULL; 7859 7860 ptr_idxs = recv_buffer_idxs_local; 7861 if (n_recvs) { 7862 PetscCall(PetscCalloc1(new_local_rows,&new_local_nnz)); 7863 } 7864 for (i=0;i<n_recvs;i++) { 7865 PetscInt j; 7866 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7867 for (j=0;j<*(ptr_idxs+1);j++) { 7868 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7869 } 7870 } else { 7871 /* TODO */ 7872 } 7873 ptr_idxs += olengths_idxs[i]; 7874 } 7875 if (new_local_nnz) { 7876 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7877 PetscCall(MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz)); 7878 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7879 PetscCall(MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7880 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7881 PetscCall(MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7882 } else { 7883 PetscCall(MatSetUp(local_mat)); 7884 } 7885 PetscCall(PetscFree(new_local_nnz)); 7886 } else { 7887 PetscCall(MatSetUp(local_mat)); 7888 } 7889 7890 /* set values */ 7891 ptr_vals = recv_buffer_vals; 7892 ptr_idxs = recv_buffer_idxs_local; 7893 for (i=0;i<n_recvs;i++) { 7894 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7895 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 7896 PetscCall(MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES)); 7897 PetscCall(MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY)); 7898 PetscCall(MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY)); 7899 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 7900 } else { 7901 /* TODO */ 7902 } 7903 ptr_idxs += olengths_idxs[i]; 7904 ptr_vals += olengths_vals[i]; 7905 } 7906 PetscCall(MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY)); 7907 PetscCall(MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY)); 7908 PetscCall(MatISRestoreLocalMat(*mat_n,&local_mat)); 7909 PetscCall(MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY)); 7910 PetscCall(MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY)); 7911 PetscCall(PetscFree(recv_buffer_vals)); 7912 7913 #if 0 7914 if (!restrict_comm) { /* check */ 7915 Vec lvec,rvec; 7916 PetscReal infty_error; 7917 7918 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7919 PetscCall(VecSetRandom(rvec,NULL)); 7920 PetscCall(MatMult(mat,rvec,lvec)); 7921 PetscCall(VecScale(lvec,-1.0)); 7922 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7923 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7924 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7925 PetscCall(VecDestroy(&rvec)); 7926 PetscCall(VecDestroy(&lvec)); 7927 } 7928 #endif 7929 7930 /* assemble new additional is (if any) */ 7931 if (nis) { 7932 PetscInt **temp_idxs,*count_is,j,psum; 7933 7934 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE)); 7935 PetscCall(PetscCalloc1(nis,&count_is)); 7936 ptr_idxs = recv_buffer_idxs_is; 7937 psum = 0; 7938 for (i=0;i<n_recvs;i++) { 7939 for (j=0;j<nis;j++) { 7940 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7941 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7942 psum += plen; 7943 ptr_idxs += plen+1; /* shift pointer to received data */ 7944 } 7945 } 7946 PetscCall(PetscMalloc1(nis,&temp_idxs)); 7947 PetscCall(PetscMalloc1(psum,&temp_idxs[0])); 7948 for (i=1;i<nis;i++) { 7949 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7950 } 7951 PetscCall(PetscArrayzero(count_is,nis)); 7952 ptr_idxs = recv_buffer_idxs_is; 7953 for (i=0;i<n_recvs;i++) { 7954 for (j=0;j<nis;j++) { 7955 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7956 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen)); 7957 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7958 ptr_idxs += plen+1; /* shift pointer to received data */ 7959 } 7960 } 7961 for (i=0;i<nis;i++) { 7962 PetscCall(ISDestroy(&isarray[i])); 7963 PetscCall(PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i])); 7964 PetscCall(ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i])); 7965 } 7966 PetscCall(PetscFree(count_is)); 7967 PetscCall(PetscFree(temp_idxs[0])); 7968 PetscCall(PetscFree(temp_idxs)); 7969 } 7970 /* free workspace */ 7971 PetscCall(PetscFree(recv_buffer_idxs_is)); 7972 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE)); 7973 PetscCall(PetscFree(send_buffer_idxs)); 7974 PetscCallMPI(MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE)); 7975 if (isdense) { 7976 PetscCall(MatISGetLocalMat(mat,&local_mat)); 7977 PetscCall(MatDenseRestoreArrayRead(local_mat,&send_buffer_vals)); 7978 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 7979 } else { 7980 /* PetscCall(PetscFree(send_buffer_vals)); */ 7981 } 7982 if (nis) { 7983 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE)); 7984 PetscCall(PetscFree(send_buffer_idxs_is)); 7985 } 7986 7987 if (nvecs) { 7988 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE)); 7989 PetscCallMPI(MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE)); 7990 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 7991 PetscCall(VecDestroy(&nnsp_vec[0])); 7992 PetscCall(VecCreate(comm_n,&nnsp_vec[0])); 7993 PetscCall(VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE)); 7994 PetscCall(VecSetType(nnsp_vec[0],VECSTANDARD)); 7995 /* set values */ 7996 ptr_vals = recv_buffer_vecs; 7997 ptr_idxs = recv_buffer_idxs_local; 7998 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 7999 for (i=0;i<n_recvs;i++) { 8000 PetscInt j; 8001 for (j=0;j<*(ptr_idxs+1);j++) { 8002 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8003 } 8004 ptr_idxs += olengths_idxs[i]; 8005 ptr_vals += olengths_idxs[i]-2; 8006 } 8007 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8008 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8009 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8010 } 8011 8012 PetscCall(PetscFree(recv_buffer_vecs)); 8013 PetscCall(PetscFree(recv_buffer_idxs_local)); 8014 PetscCall(PetscFree(recv_req_idxs)); 8015 PetscCall(PetscFree(recv_req_vals)); 8016 PetscCall(PetscFree(recv_req_vecs)); 8017 PetscCall(PetscFree(recv_req_idxs_is)); 8018 PetscCall(PetscFree(send_req_idxs)); 8019 PetscCall(PetscFree(send_req_vals)); 8020 PetscCall(PetscFree(send_req_vecs)); 8021 PetscCall(PetscFree(send_req_idxs_is)); 8022 PetscCall(PetscFree(ilengths_vals)); 8023 PetscCall(PetscFree(ilengths_idxs)); 8024 PetscCall(PetscFree(olengths_vals)); 8025 PetscCall(PetscFree(olengths_idxs)); 8026 PetscCall(PetscFree(onodes)); 8027 if (nis) { 8028 PetscCall(PetscFree(ilengths_idxs_is)); 8029 PetscCall(PetscFree(olengths_idxs_is)); 8030 PetscCall(PetscFree(onodes_is)); 8031 } 8032 PetscCall(PetscSubcommDestroy(&subcomm)); 8033 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8034 PetscCall(MatDestroy(mat_n)); 8035 for (i=0;i<nis;i++) { 8036 PetscCall(ISDestroy(&isarray[i])); 8037 } 8038 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8039 PetscCall(VecDestroy(&nnsp_vec[0])); 8040 } 8041 *mat_n = NULL; 8042 } 8043 PetscFunctionReturn(0); 8044 } 8045 8046 /* temporary hack into ksp private data structure */ 8047 #include <petsc/private/kspimpl.h> 8048 8049 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8050 { 8051 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8052 PC_IS *pcis = (PC_IS*)pc->data; 8053 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8054 Mat coarsedivudotp = NULL; 8055 Mat coarseG,t_coarse_mat_is; 8056 MatNullSpace CoarseNullSpace = NULL; 8057 ISLocalToGlobalMapping coarse_islg; 8058 IS coarse_is,*isarray,corners; 8059 PetscInt i,im_active=-1,active_procs=-1; 8060 PetscInt nis,nisdofs,nisneu,nisvert; 8061 PetscInt coarse_eqs_per_proc; 8062 PC pc_temp; 8063 PCType coarse_pc_type; 8064 KSPType coarse_ksp_type; 8065 PetscBool multilevel_requested,multilevel_allowed; 8066 PetscBool coarse_reuse; 8067 PetscInt ncoarse,nedcfield; 8068 PetscBool compute_vecs = PETSC_FALSE; 8069 PetscScalar *array; 8070 MatReuse coarse_mat_reuse; 8071 PetscBool restr, full_restr, have_void; 8072 PetscMPIInt size; 8073 8074 PetscFunctionBegin; 8075 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8076 /* Assign global numbering to coarse dofs */ 8077 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8078 PetscInt ocoarse_size; 8079 compute_vecs = PETSC_TRUE; 8080 8081 pcbddc->new_primal_space = PETSC_TRUE; 8082 ocoarse_size = pcbddc->coarse_size; 8083 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8084 PetscCall(PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices)); 8085 /* see if we can avoid some work */ 8086 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8087 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8088 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8089 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8090 coarse_reuse = PETSC_FALSE; 8091 } else { /* we can safely reuse already computed coarse matrix */ 8092 coarse_reuse = PETSC_TRUE; 8093 } 8094 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8095 coarse_reuse = PETSC_FALSE; 8096 } 8097 /* reset any subassembling information */ 8098 if (!coarse_reuse || pcbddc->recompute_topography) { 8099 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8100 } 8101 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8102 coarse_reuse = PETSC_TRUE; 8103 } 8104 if (coarse_reuse && pcbddc->coarse_ksp) { 8105 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 8106 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8107 coarse_mat_reuse = MAT_REUSE_MATRIX; 8108 } else { 8109 coarse_mat = NULL; 8110 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8111 } 8112 8113 /* creates temporary l2gmap and IS for coarse indexes */ 8114 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is)); 8115 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg)); 8116 8117 /* creates temporary MATIS object for coarse matrix */ 8118 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense)); 8119 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,coarse_islg,&t_coarse_mat_is)); 8120 PetscCall(MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense)); 8121 PetscCall(MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8122 PetscCall(MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8123 PetscCall(MatDestroy(&coarse_submat_dense)); 8124 8125 /* count "active" (i.e. with positive local size) and "void" processes */ 8126 im_active = !!(pcis->n); 8127 PetscCall(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8128 8129 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8130 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8131 /* full_restr : just use the receivers from the subassembling pattern */ 8132 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size)); 8133 coarse_mat_is = NULL; 8134 multilevel_allowed = PETSC_FALSE; 8135 multilevel_requested = PETSC_FALSE; 8136 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8137 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8138 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8139 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8140 if (multilevel_requested) { 8141 ncoarse = active_procs/pcbddc->coarsening_ratio; 8142 restr = PETSC_FALSE; 8143 full_restr = PETSC_FALSE; 8144 } else { 8145 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8146 restr = PETSC_TRUE; 8147 full_restr = PETSC_TRUE; 8148 } 8149 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8150 ncoarse = PetscMax(1,ncoarse); 8151 if (!pcbddc->coarse_subassembling) { 8152 if (pcbddc->coarsening_ratio > 1) { 8153 if (multilevel_requested) { 8154 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8155 } else { 8156 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8157 } 8158 } else { 8159 PetscMPIInt rank; 8160 8161 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank)); 8162 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8163 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling)); 8164 } 8165 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8166 PetscInt psum; 8167 if (pcbddc->coarse_ksp) psum = 1; 8168 else psum = 0; 8169 PetscCall(MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8170 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8171 } 8172 /* determine if we can go multilevel */ 8173 if (multilevel_requested) { 8174 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8175 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8176 } 8177 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8178 8179 /* dump subassembling pattern */ 8180 if (pcbddc->dbg_flag && multilevel_allowed) { 8181 PetscCall(ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer)); 8182 } 8183 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8184 nedcfield = -1; 8185 corners = NULL; 8186 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8187 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8188 const PetscInt *idxs; 8189 ISLocalToGlobalMapping tmap; 8190 8191 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8192 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap)); 8193 /* allocate space for temporary storage */ 8194 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs)); 8195 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs2)); 8196 /* allocate for IS array */ 8197 nisdofs = pcbddc->n_ISForDofsLocal; 8198 if (pcbddc->nedclocal) { 8199 if (pcbddc->nedfield > -1) { 8200 nedcfield = pcbddc->nedfield; 8201 } else { 8202 nedcfield = 0; 8203 PetscCheck(!nisdofs,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%" PetscInt_FMT ")",nisdofs); 8204 nisdofs = 1; 8205 } 8206 } 8207 nisneu = !!pcbddc->NeumannBoundariesLocal; 8208 nisvert = 0; /* nisvert is not used */ 8209 nis = nisdofs + nisneu + nisvert; 8210 PetscCall(PetscMalloc1(nis,&isarray)); 8211 /* dofs splitting */ 8212 for (i=0;i<nisdofs;i++) { 8213 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8214 if (nedcfield != i) { 8215 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize)); 8216 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8217 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8218 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8219 } else { 8220 PetscCall(ISGetLocalSize(pcbddc->nedclocal,&tsize)); 8221 PetscCall(ISGetIndices(pcbddc->nedclocal,&idxs)); 8222 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8223 PetscCheck(tsize == nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT,tsize,nout); 8224 PetscCall(ISRestoreIndices(pcbddc->nedclocal,&idxs)); 8225 } 8226 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8227 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i])); 8228 /* PetscCall(ISView(isarray[i],0)); */ 8229 } 8230 /* neumann boundaries */ 8231 if (pcbddc->NeumannBoundariesLocal) { 8232 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8233 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize)); 8234 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8235 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8236 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8237 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8238 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs])); 8239 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8240 } 8241 /* coordinates */ 8242 if (pcbddc->corner_selected) { 8243 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8244 PetscCall(ISGetLocalSize(corners,&tsize)); 8245 PetscCall(ISGetIndices(corners,&idxs)); 8246 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8247 PetscCheck(tsize == nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT,tsize,nout); 8248 PetscCall(ISRestoreIndices(corners,&idxs)); 8249 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8250 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8251 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners)); 8252 } 8253 PetscCall(PetscFree(tidxs)); 8254 PetscCall(PetscFree(tidxs2)); 8255 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8256 } else { 8257 nis = 0; 8258 nisdofs = 0; 8259 nisneu = 0; 8260 nisvert = 0; 8261 isarray = NULL; 8262 } 8263 /* destroy no longer needed map */ 8264 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8265 8266 /* subassemble */ 8267 if (multilevel_allowed) { 8268 Vec vp[1]; 8269 PetscInt nvecs = 0; 8270 PetscBool reuse,reuser; 8271 8272 if (coarse_mat) reuse = PETSC_TRUE; 8273 else reuse = PETSC_FALSE; 8274 PetscCall(MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8275 vp[0] = NULL; 8276 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8277 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&vp[0])); 8278 PetscCall(VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE)); 8279 PetscCall(VecSetType(vp[0],VECSTANDARD)); 8280 nvecs = 1; 8281 8282 if (pcbddc->divudotp) { 8283 Mat B,loc_divudotp; 8284 Vec v,p; 8285 IS dummy; 8286 PetscInt np; 8287 8288 PetscCall(MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp)); 8289 PetscCall(MatGetSize(loc_divudotp,&np,NULL)); 8290 PetscCall(ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy)); 8291 PetscCall(MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B)); 8292 PetscCall(MatCreateVecs(B,&v,&p)); 8293 PetscCall(VecSet(p,1.)); 8294 PetscCall(MatMultTranspose(B,p,v)); 8295 PetscCall(VecDestroy(&p)); 8296 PetscCall(MatDestroy(&B)); 8297 PetscCall(VecGetArray(vp[0],&array)); 8298 PetscCall(VecPlaceArray(pcbddc->vec1_P,array)); 8299 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P)); 8300 PetscCall(VecResetArray(pcbddc->vec1_P)); 8301 PetscCall(VecRestoreArray(vp[0],&array)); 8302 PetscCall(ISDestroy(&dummy)); 8303 PetscCall(VecDestroy(&v)); 8304 } 8305 } 8306 if (reuser) { 8307 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp)); 8308 } else { 8309 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp)); 8310 } 8311 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8312 PetscScalar *arraym; 8313 const PetscScalar *arrayv; 8314 PetscInt nl; 8315 PetscCall(VecGetLocalSize(vp[0],&nl)); 8316 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp)); 8317 PetscCall(MatDenseGetArray(coarsedivudotp,&arraym)); 8318 PetscCall(VecGetArrayRead(vp[0],&arrayv)); 8319 PetscCall(PetscArraycpy(arraym,arrayv,nl)); 8320 PetscCall(VecRestoreArrayRead(vp[0],&arrayv)); 8321 PetscCall(MatDenseRestoreArray(coarsedivudotp,&arraym)); 8322 PetscCall(VecDestroy(&vp[0])); 8323 } else { 8324 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp)); 8325 } 8326 } else { 8327 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL)); 8328 } 8329 if (coarse_mat_is || coarse_mat) { 8330 if (!multilevel_allowed) { 8331 PetscCall(MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat)); 8332 } else { 8333 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8334 if (coarse_mat_is) { 8335 PetscCheck(!coarse_mat,PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8336 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8337 coarse_mat = coarse_mat_is; 8338 } 8339 } 8340 } 8341 PetscCall(MatDestroy(&t_coarse_mat_is)); 8342 PetscCall(MatDestroy(&coarse_mat_is)); 8343 8344 /* create local to global scatters for coarse problem */ 8345 if (compute_vecs) { 8346 PetscInt lrows; 8347 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8348 if (coarse_mat) { 8349 PetscCall(MatGetLocalSize(coarse_mat,&lrows,NULL)); 8350 } else { 8351 lrows = 0; 8352 } 8353 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec)); 8354 PetscCall(VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE)); 8355 PetscCall(VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8356 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8357 PetscCall(VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob)); 8358 } 8359 PetscCall(ISDestroy(&coarse_is)); 8360 8361 /* set defaults for coarse KSP and PC */ 8362 if (multilevel_allowed) { 8363 coarse_ksp_type = KSPRICHARDSON; 8364 coarse_pc_type = PCBDDC; 8365 } else { 8366 coarse_ksp_type = KSPPREONLY; 8367 coarse_pc_type = PCREDUNDANT; 8368 } 8369 8370 /* print some info if requested */ 8371 if (pcbddc->dbg_flag) { 8372 if (!multilevel_allowed) { 8373 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8374 if (multilevel_requested) { 8375 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio)); 8376 } else if (pcbddc->max_levels) { 8377 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%" PetscInt_FMT ")\n",pcbddc->max_levels)); 8378 } 8379 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8380 } 8381 } 8382 8383 /* communicate coarse discrete gradient */ 8384 coarseG = NULL; 8385 if (pcbddc->nedcG && multilevel_allowed) { 8386 MPI_Comm ccomm; 8387 if (coarse_mat) { 8388 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8389 } else { 8390 ccomm = MPI_COMM_NULL; 8391 } 8392 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG)); 8393 } 8394 8395 /* create the coarse KSP object only once with defaults */ 8396 if (coarse_mat) { 8397 PetscBool isredundant,isbddc,force,valid; 8398 PetscViewer dbg_viewer = NULL; 8399 PetscBool isset,issym,isher,isspd; 8400 8401 if (pcbddc->dbg_flag) { 8402 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8403 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level)); 8404 } 8405 if (!pcbddc->coarse_ksp) { 8406 char prefix[256],str_level[16]; 8407 size_t len; 8408 8409 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp)); 8410 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure)); 8411 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1)); 8412 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1)); 8413 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8414 PetscCall(KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type)); 8415 PetscCall(KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE)); 8416 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8417 /* TODO is this logic correct? should check for coarse_mat type */ 8418 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8419 /* prefix */ 8420 PetscCall(PetscStrcpy(prefix,"")); 8421 PetscCall(PetscStrcpy(str_level,"")); 8422 if (!pcbddc->current_level) { 8423 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix))); 8424 PetscCall(PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix))); 8425 } else { 8426 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 8427 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8428 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8429 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8430 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1)); 8431 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 8432 PetscCall(PetscStrlcat(prefix,str_level,sizeof(prefix))); 8433 } 8434 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix)); 8435 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8436 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8437 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8438 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8439 /* allow user customization */ 8440 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8441 /* get some info after set from options */ 8442 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8443 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8444 force = PETSC_FALSE; 8445 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8446 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8447 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8448 if (multilevel_allowed && !force && !valid) { 8449 isbddc = PETSC_TRUE; 8450 PetscCall(PCSetType(pc_temp,PCBDDC)); 8451 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8452 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8453 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8454 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8455 PetscObjectOptionsBegin((PetscObject)pc_temp); 8456 PetscCall((*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp)); 8457 PetscCall(PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp)); 8458 PetscOptionsEnd(); 8459 pc_temp->setfromoptionscalled++; 8460 } 8461 } 8462 } 8463 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8464 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8465 if (nisdofs) { 8466 PetscCall(PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray)); 8467 for (i=0;i<nisdofs;i++) { 8468 PetscCall(ISDestroy(&isarray[i])); 8469 } 8470 } 8471 if (nisneu) { 8472 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs])); 8473 PetscCall(ISDestroy(&isarray[nisdofs])); 8474 } 8475 if (nisvert) { 8476 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1])); 8477 PetscCall(ISDestroy(&isarray[nis-1])); 8478 } 8479 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE)); 8480 8481 /* get some info after set from options */ 8482 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8483 8484 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8485 if (isbddc && !multilevel_allowed) { 8486 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8487 } 8488 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8489 force = PETSC_FALSE; 8490 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8491 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8492 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8493 PetscCall(PCSetType(pc_temp,PCBDDC)); 8494 } 8495 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant)); 8496 if (isredundant) { 8497 KSP inner_ksp; 8498 PC inner_pc; 8499 8500 PetscCall(PCRedundantGetKSP(pc_temp,&inner_ksp)); 8501 PetscCall(KSPGetPC(inner_ksp,&inner_pc)); 8502 } 8503 8504 /* parameters which miss an API */ 8505 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8506 if (isbddc) { 8507 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8508 8509 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8510 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8511 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8512 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8513 if (pcbddc_coarse->benign_saddle_point) { 8514 Mat coarsedivudotp_is; 8515 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8516 IS row,col; 8517 const PetscInt *gidxs; 8518 PetscInt n,st,M,N; 8519 8520 PetscCall(MatGetSize(coarsedivudotp,&n,NULL)); 8521 PetscCallMPI(MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat))); 8522 st = st-n; 8523 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row)); 8524 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL)); 8525 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap,&n)); 8526 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 8527 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col)); 8528 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 8529 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 8530 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 8531 PetscCall(ISGetSize(row,&M)); 8532 PetscCall(MatGetSize(coarse_mat,&N,NULL)); 8533 PetscCall(ISDestroy(&row)); 8534 PetscCall(ISDestroy(&col)); 8535 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is)); 8536 PetscCall(MatSetType(coarsedivudotp_is,MATIS)); 8537 PetscCall(MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N)); 8538 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g)); 8539 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8540 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8541 PetscCall(MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp)); 8542 PetscCall(MatDestroy(&coarsedivudotp)); 8543 PetscCall(PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL)); 8544 PetscCall(MatDestroy(&coarsedivudotp_is)); 8545 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8546 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8547 } 8548 } 8549 8550 /* propagate symmetry info of coarse matrix */ 8551 PetscCall(MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 8552 PetscCall(MatIsSymmetricKnown(pc->pmat,&isset,&issym)); 8553 if (isset) PetscCall(MatSetOption(coarse_mat,MAT_SYMMETRIC,issym)); 8554 PetscCall(MatIsHermitianKnown(pc->pmat,&isset,&isher)); 8555 if (isset) PetscCall(MatSetOption(coarse_mat,MAT_HERMITIAN,isher)); 8556 PetscCall(MatIsSPDKnown(pc->pmat,&isset,&isspd)); 8557 if (isset) PetscCall(MatSetOption(coarse_mat,MAT_SPD,isspd)); 8558 8559 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8560 PetscCall(MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE)); 8561 } 8562 /* set operators */ 8563 PetscCall(MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view")); 8564 PetscCall(MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix)); 8565 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8566 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level)); 8567 } 8568 PetscCall(MatDestroy(&coarseG)); 8569 PetscCall(PetscFree(isarray)); 8570 #if 0 8571 { 8572 PetscViewer viewer; 8573 char filename[256]; 8574 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8575 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8576 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8577 PetscCall(MatView(coarse_mat,viewer)); 8578 PetscCall(PetscViewerPopFormat(viewer)); 8579 PetscCall(PetscViewerDestroy(&viewer)); 8580 } 8581 #endif 8582 8583 if (corners) { 8584 Vec gv; 8585 IS is; 8586 const PetscInt *idxs; 8587 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8588 PetscScalar *coords; 8589 8590 PetscCheck(pcbddc->mat_graph->cloc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8591 PetscCall(VecGetSize(pcbddc->coarse_vec,&N)); 8592 PetscCall(VecGetLocalSize(pcbddc->coarse_vec,&n)); 8593 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv)); 8594 PetscCall(VecSetBlockSize(gv,cdim)); 8595 PetscCall(VecSetSizes(gv,n*cdim,N*cdim)); 8596 PetscCall(VecSetType(gv,VECSTANDARD)); 8597 PetscCall(VecSetFromOptions(gv)); 8598 PetscCall(VecSet(gv,PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8599 8600 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8601 PetscCall(ISGetLocalSize(is,&n)); 8602 PetscCall(ISGetIndices(is,&idxs)); 8603 PetscCall(PetscMalloc1(n*cdim,&coords)); 8604 for (i=0;i<n;i++) { 8605 for (d=0;d<cdim;d++) { 8606 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8607 } 8608 } 8609 PetscCall(ISRestoreIndices(is,&idxs)); 8610 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8611 8612 PetscCall(ISGetLocalSize(corners,&n)); 8613 PetscCall(ISGetIndices(corners,&idxs)); 8614 PetscCall(VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES)); 8615 PetscCall(ISRestoreIndices(corners,&idxs)); 8616 PetscCall(PetscFree(coords)); 8617 PetscCall(VecAssemblyBegin(gv)); 8618 PetscCall(VecAssemblyEnd(gv)); 8619 PetscCall(VecGetArray(gv,&coords)); 8620 if (pcbddc->coarse_ksp) { 8621 PC coarse_pc; 8622 PetscBool isbddc; 8623 8624 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 8625 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 8626 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8627 PetscReal *realcoords; 8628 8629 PetscCall(VecGetLocalSize(gv,&n)); 8630 #if defined(PETSC_USE_COMPLEX) 8631 PetscCall(PetscMalloc1(n,&realcoords)); 8632 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8633 #else 8634 realcoords = coords; 8635 #endif 8636 PetscCall(PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords)); 8637 #if defined(PETSC_USE_COMPLEX) 8638 PetscCall(PetscFree(realcoords)); 8639 #endif 8640 } 8641 } 8642 PetscCall(VecRestoreArray(gv,&coords)); 8643 PetscCall(VecDestroy(&gv)); 8644 } 8645 PetscCall(ISDestroy(&corners)); 8646 8647 if (pcbddc->coarse_ksp) { 8648 Vec crhs,csol; 8649 8650 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&csol)); 8651 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&crhs)); 8652 if (!csol) { 8653 PetscCall(MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL)); 8654 } 8655 if (!crhs) { 8656 PetscCall(MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs))); 8657 } 8658 } 8659 PetscCall(MatDestroy(&coarsedivudotp)); 8660 8661 /* compute null space for coarse solver if the benign trick has been requested */ 8662 if (pcbddc->benign_null) { 8663 8664 PetscCall(VecSet(pcbddc->vec1_P,0.)); 8665 for (i=0;i<pcbddc->benign_n;i++) { 8666 PetscCall(VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES)); 8667 } 8668 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8669 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8670 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8671 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8672 if (coarse_mat) { 8673 Vec nullv; 8674 PetscScalar *array,*array2; 8675 PetscInt nl; 8676 8677 PetscCall(MatCreateVecs(coarse_mat,&nullv,NULL)); 8678 PetscCall(VecGetLocalSize(nullv,&nl)); 8679 PetscCall(VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8680 PetscCall(VecGetArray(nullv,&array2)); 8681 PetscCall(PetscArraycpy(array2,array,nl)); 8682 PetscCall(VecRestoreArray(nullv,&array2)); 8683 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8684 PetscCall(VecNormalize(nullv,NULL)); 8685 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace)); 8686 PetscCall(VecDestroy(&nullv)); 8687 } 8688 } 8689 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8690 8691 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8692 if (pcbddc->coarse_ksp) { 8693 PetscBool ispreonly; 8694 8695 if (CoarseNullSpace) { 8696 PetscBool isnull; 8697 8698 PetscCall(MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull)); 8699 if (isnull) PetscCall(MatSetNullSpace(coarse_mat,CoarseNullSpace)); 8700 /* TODO: add local nullspaces (if any) */ 8701 } 8702 /* setup coarse ksp */ 8703 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8704 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8705 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly)); 8706 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8707 KSP check_ksp; 8708 KSPType check_ksp_type; 8709 PC check_pc; 8710 Vec check_vec,coarse_vec; 8711 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8712 PetscInt its; 8713 PetscBool compute_eigs; 8714 PetscReal *eigs_r,*eigs_c; 8715 PetscInt neigs; 8716 const char *prefix; 8717 8718 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8719 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp)); 8720 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0)); 8721 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE)); 8722 PetscCall(KSPSetOperators(check_ksp,coarse_mat,coarse_mat)); 8723 PetscCall(KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size)); 8724 /* prevent from setup unneeded object */ 8725 PetscCall(KSPGetPC(check_ksp,&check_pc)); 8726 PetscCall(PCSetType(check_pc,PCNONE)); 8727 if (ispreonly) { 8728 check_ksp_type = KSPPREONLY; 8729 compute_eigs = PETSC_FALSE; 8730 } else { 8731 check_ksp_type = KSPGMRES; 8732 compute_eigs = PETSC_TRUE; 8733 } 8734 PetscCall(KSPSetType(check_ksp,check_ksp_type)); 8735 PetscCall(KSPSetComputeSingularValues(check_ksp,compute_eigs)); 8736 PetscCall(KSPSetComputeEigenvalues(check_ksp,compute_eigs)); 8737 PetscCall(KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1)); 8738 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix)); 8739 PetscCall(KSPSetOptionsPrefix(check_ksp,prefix)); 8740 PetscCall(KSPAppendOptionsPrefix(check_ksp,"check_")); 8741 PetscCall(KSPSetFromOptions(check_ksp)); 8742 PetscCall(KSPSetUp(check_ksp)); 8743 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&check_pc)); 8744 PetscCall(KSPSetPC(check_ksp,check_pc)); 8745 /* create random vec */ 8746 PetscCall(MatCreateVecs(coarse_mat,&coarse_vec,&check_vec)); 8747 PetscCall(VecSetRandom(check_vec,NULL)); 8748 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8749 /* solve coarse problem */ 8750 PetscCall(KSPSolve(check_ksp,coarse_vec,coarse_vec)); 8751 PetscCall(KSPCheckSolve(check_ksp,pc,coarse_vec)); 8752 /* set eigenvalue estimation if preonly has not been requested */ 8753 if (compute_eigs) { 8754 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_r)); 8755 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_c)); 8756 PetscCall(KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs)); 8757 if (neigs) { 8758 lambda_max = eigs_r[neigs-1]; 8759 lambda_min = eigs_r[0]; 8760 if (pcbddc->use_coarse_estimates) { 8761 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8762 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min)); 8763 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min))); 8764 } 8765 } 8766 } 8767 } 8768 8769 /* check coarse problem residual error */ 8770 if (pcbddc->dbg_flag) { 8771 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8772 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1))); 8773 PetscCall(VecAXPY(check_vec,-1.0,coarse_vec)); 8774 PetscCall(VecNorm(check_vec,NORM_INFINITY,&infty_error)); 8775 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8776 PetscCall(VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error)); 8777 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates)); 8778 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer)); 8779 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer)); 8780 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",(double)infty_error)); 8781 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",(double)abs_infty_error)); 8782 if (CoarseNullSpace) { 8783 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n")); 8784 } 8785 if (compute_eigs) { 8786 PetscReal lambda_max_s,lambda_min_s; 8787 KSPConvergedReason reason; 8788 PetscCall(KSPGetType(check_ksp,&check_ksp_type)); 8789 PetscCall(KSPGetIterationNumber(check_ksp,&its)); 8790 PetscCall(KSPGetConvergedReason(check_ksp,&reason)); 8791 PetscCall(KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s)); 8792 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,(double)lambda_min,(double)lambda_max,(double)lambda_min_s,(double)lambda_max_s)); 8793 for (i=0;i<neigs;i++) { 8794 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",(double)eigs_r[i],(double)eigs_c[i])); 8795 } 8796 } 8797 PetscCall(PetscViewerFlush(dbg_viewer)); 8798 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1))); 8799 } 8800 PetscCall(VecDestroy(&check_vec)); 8801 PetscCall(VecDestroy(&coarse_vec)); 8802 PetscCall(KSPDestroy(&check_ksp)); 8803 if (compute_eigs) { 8804 PetscCall(PetscFree(eigs_r)); 8805 PetscCall(PetscFree(eigs_c)); 8806 } 8807 } 8808 } 8809 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8810 /* print additional info */ 8811 if (pcbddc->dbg_flag) { 8812 /* waits until all processes reaches this point */ 8813 PetscCall(PetscBarrier((PetscObject)pc)); 8814 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %" PetscInt_FMT "\n",pcbddc->current_level)); 8815 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8816 } 8817 8818 /* free memory */ 8819 PetscCall(MatDestroy(&coarse_mat)); 8820 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8821 PetscFunctionReturn(0); 8822 } 8823 8824 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8825 { 8826 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8827 PC_IS* pcis = (PC_IS*)pc->data; 8828 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8829 IS subset,subset_mult,subset_n; 8830 PetscInt local_size,coarse_size=0; 8831 PetscInt *local_primal_indices=NULL; 8832 const PetscInt *t_local_primal_indices; 8833 8834 PetscFunctionBegin; 8835 /* Compute global number of coarse dofs */ 8836 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8837 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n)); 8838 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset)); 8839 PetscCall(ISDestroy(&subset_n)); 8840 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult)); 8841 PetscCall(ISRenumber(subset,subset_mult,&coarse_size,&subset_n)); 8842 PetscCall(ISDestroy(&subset)); 8843 PetscCall(ISDestroy(&subset_mult)); 8844 PetscCall(ISGetLocalSize(subset_n,&local_size)); 8845 PetscCheck(local_size == pcbddc->local_primal_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT,local_size,pcbddc->local_primal_size); 8846 PetscCall(PetscMalloc1(local_size,&local_primal_indices)); 8847 PetscCall(ISGetIndices(subset_n,&t_local_primal_indices)); 8848 PetscCall(PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size)); 8849 PetscCall(ISRestoreIndices(subset_n,&t_local_primal_indices)); 8850 PetscCall(ISDestroy(&subset_n)); 8851 8852 /* check numbering */ 8853 if (pcbddc->dbg_flag) { 8854 PetscScalar coarsesum,*array,*array2; 8855 PetscInt i; 8856 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8857 8858 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8859 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8860 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n")); 8861 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8862 /* counter */ 8863 PetscCall(VecSet(pcis->vec1_global,0.0)); 8864 PetscCall(VecSet(pcis->vec1_N,1.0)); 8865 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8866 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8867 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8868 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8869 PetscCall(VecSet(pcis->vec1_N,0.0)); 8870 for (i=0;i<pcbddc->local_primal_size;i++) { 8871 PetscCall(VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES)); 8872 } 8873 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8874 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8875 PetscCall(VecSet(pcis->vec1_global,0.0)); 8876 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8877 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8878 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8879 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8880 PetscCall(VecGetArray(pcis->vec1_N,&array)); 8881 PetscCall(VecGetArray(pcis->vec2_N,&array2)); 8882 for (i=0;i<pcis->n;i++) { 8883 if (array[i] != 0.0 && array[i] != array2[i]) { 8884 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8885 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8886 set_error = PETSC_TRUE; 8887 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi)); 8888 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n",PetscGlobalRank,i,gi,owned,neigh)); 8889 } 8890 } 8891 PetscCall(VecRestoreArray(pcis->vec2_N,&array2)); 8892 PetscCall(MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8893 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8894 for (i=0;i<pcis->n;i++) { 8895 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8896 } 8897 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 8898 PetscCall(VecSet(pcis->vec1_global,0.0)); 8899 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8900 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8901 PetscCall(VecSum(pcis->vec1_global,&coarsesum)); 8902 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %" PetscInt_FMT " (%lf)\n",coarse_size,(double)PetscRealPart(coarsesum))); 8903 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8904 PetscInt *gidxs; 8905 8906 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&gidxs)); 8907 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs)); 8908 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n")); 8909 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8910 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank)); 8911 for (i=0;i<pcbddc->local_primal_size;i++) { 8912 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i])); 8913 } 8914 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8915 PetscCall(PetscFree(gidxs)); 8916 } 8917 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8918 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8919 PetscCheck(!set_error_reduced,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8920 } 8921 8922 /* get back data */ 8923 *coarse_size_n = coarse_size; 8924 *local_primal_indices_n = local_primal_indices; 8925 PetscFunctionReturn(0); 8926 } 8927 8928 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8929 { 8930 IS localis_t; 8931 PetscInt i,lsize,*idxs,n; 8932 PetscScalar *vals; 8933 8934 PetscFunctionBegin; 8935 /* get indices in local ordering exploiting local to global map */ 8936 PetscCall(ISGetLocalSize(globalis,&lsize)); 8937 PetscCall(PetscMalloc1(lsize,&vals)); 8938 for (i=0;i<lsize;i++) vals[i] = 1.0; 8939 PetscCall(ISGetIndices(globalis,(const PetscInt**)&idxs)); 8940 PetscCall(VecSet(gwork,0.0)); 8941 PetscCall(VecSet(lwork,0.0)); 8942 if (idxs) { /* multilevel guard */ 8943 PetscCall(VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE)); 8944 PetscCall(VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES)); 8945 } 8946 PetscCall(VecAssemblyBegin(gwork)); 8947 PetscCall(ISRestoreIndices(globalis,(const PetscInt**)&idxs)); 8948 PetscCall(PetscFree(vals)); 8949 PetscCall(VecAssemblyEnd(gwork)); 8950 /* now compute set in local ordering */ 8951 PetscCall(VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 8952 PetscCall(VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 8953 PetscCall(VecGetArrayRead(lwork,(const PetscScalar**)&vals)); 8954 PetscCall(VecGetSize(lwork,&n)); 8955 for (i=0,lsize=0;i<n;i++) { 8956 if (PetscRealPart(vals[i]) > 0.5) { 8957 lsize++; 8958 } 8959 } 8960 PetscCall(PetscMalloc1(lsize,&idxs)); 8961 for (i=0,lsize=0;i<n;i++) { 8962 if (PetscRealPart(vals[i]) > 0.5) { 8963 idxs[lsize++] = i; 8964 } 8965 } 8966 PetscCall(VecRestoreArrayRead(lwork,(const PetscScalar**)&vals)); 8967 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t)); 8968 *localis = localis_t; 8969 PetscFunctionReturn(0); 8970 } 8971 8972 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 8973 { 8974 PC_IS *pcis = (PC_IS*)pc->data; 8975 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8976 PC_IS *pcisf; 8977 PC_BDDC *pcbddcf; 8978 PC pcf; 8979 8980 PetscFunctionBegin; 8981 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc),&pcf)); 8982 PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)pcf)); 8983 PetscCall(PCSetOperators(pcf,pc->mat,pc->pmat)); 8984 PetscCall(PCSetType(pcf,PCBDDC)); 8985 8986 pcisf = (PC_IS*)pcf->data; 8987 pcbddcf = (PC_BDDC*)pcf->data; 8988 8989 pcisf->is_B_local = pcis->is_B_local; 8990 pcisf->vec1_N = pcis->vec1_N; 8991 pcisf->BtoNmap = pcis->BtoNmap; 8992 pcisf->n = pcis->n; 8993 pcisf->n_B = pcis->n_B; 8994 8995 PetscCall(PetscFree(pcbddcf->mat_graph)); 8996 PetscCall(PetscFree(pcbddcf->sub_schurs)); 8997 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 8998 pcbddcf->sub_schurs = schurs; 8999 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 9000 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 9001 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 9002 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 9003 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9004 pcbddcf->use_faces = PETSC_TRUE; 9005 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9006 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9007 pcbddcf->use_qr_single = (PetscBool)!constraints; 9008 pcbddcf->fake_change = PETSC_TRUE; 9009 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9010 9011 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9012 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9013 9014 *change = pcbddcf->ConstraintMatrix; 9015 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat),pcbddcf->local_primal_size_cc,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,change_primal)); 9016 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat),pcbddcf->local_primal_size_cc,pcbddcf->local_primal_ref_mult,PETSC_COPY_VALUES,change_primal_mult)); 9017 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9018 9019 if (schurs) pcbddcf->sub_schurs = NULL; 9020 pcbddcf->ConstraintMatrix = NULL; 9021 pcbddcf->mat_graph = NULL; 9022 pcisf->is_B_local = NULL; 9023 pcisf->vec1_N = NULL; 9024 pcisf->BtoNmap = NULL; 9025 PetscCall(PCDestroy(&pcf)); 9026 PetscFunctionReturn(0); 9027 } 9028 9029 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9030 { 9031 PC_IS *pcis=(PC_IS*)pc->data; 9032 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9033 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9034 Mat S_j; 9035 PetscInt *used_xadj,*used_adjncy; 9036 PetscBool free_used_adj; 9037 9038 PetscFunctionBegin; 9039 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9040 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9041 free_used_adj = PETSC_FALSE; 9042 if (pcbddc->sub_schurs_layers == -1) { 9043 used_xadj = NULL; 9044 used_adjncy = NULL; 9045 } else { 9046 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9047 used_xadj = pcbddc->mat_graph->xadj; 9048 used_adjncy = pcbddc->mat_graph->adjncy; 9049 } else if (pcbddc->computed_rowadj) { 9050 used_xadj = pcbddc->mat_graph->xadj; 9051 used_adjncy = pcbddc->mat_graph->adjncy; 9052 } else { 9053 PetscBool flg_row=PETSC_FALSE; 9054 const PetscInt *xadj,*adjncy; 9055 PetscInt nvtxs; 9056 9057 PetscCall(MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9058 if (flg_row) { 9059 PetscCall(PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy)); 9060 PetscCall(PetscArraycpy(used_xadj,xadj,nvtxs+1)); 9061 PetscCall(PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs])); 9062 free_used_adj = PETSC_TRUE; 9063 } else { 9064 pcbddc->sub_schurs_layers = -1; 9065 used_xadj = NULL; 9066 used_adjncy = NULL; 9067 } 9068 PetscCall(MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9069 } 9070 } 9071 9072 /* setup sub_schurs data */ 9073 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9074 if (!sub_schurs->schur_explicit) { 9075 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9076 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9077 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL)); 9078 } else { 9079 Mat change = NULL; 9080 Vec scaling = NULL; 9081 IS change_primal = NULL, iP; 9082 PetscInt benign_n; 9083 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9084 PetscBool need_change = PETSC_FALSE; 9085 PetscBool discrete_harmonic = PETSC_FALSE; 9086 9087 if (!pcbddc->use_vertices && reuse_solvers) { 9088 PetscInt n_vertices; 9089 9090 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 9091 reuse_solvers = (PetscBool)!n_vertices; 9092 } 9093 if (!pcbddc->benign_change_explicit) { 9094 benign_n = pcbddc->benign_n; 9095 } else { 9096 benign_n = 0; 9097 } 9098 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9099 We need a global reduction to avoid possible deadlocks. 9100 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9101 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9102 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9103 PetscCall(MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 9104 need_change = (PetscBool)(!need_change); 9105 } 9106 /* If the user defines additional constraints, we import them here */ 9107 if (need_change) { 9108 PetscCheck(!pcbddc->sub_schurs_rebuild,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9109 PetscCall(PCBDDCComputeFakeChange(pc,PETSC_FALSE,NULL,NULL,&change,&change_primal,NULL,&sub_schurs->change_with_qr)); 9110 9111 } 9112 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9113 9114 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP)); 9115 if (iP) { 9116 PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC"); 9117 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL)); 9118 PetscOptionsEnd(); 9119 } 9120 if (discrete_harmonic) { 9121 Mat A; 9122 PetscCall(MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A)); 9123 PetscCall(MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL)); 9124 PetscCall(PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP)); 9125 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9126 PetscCall(MatDestroy(&A)); 9127 } else { 9128 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9129 } 9130 PetscCall(MatDestroy(&change)); 9131 PetscCall(ISDestroy(&change_primal)); 9132 } 9133 PetscCall(MatDestroy(&S_j)); 9134 9135 /* free adjacency */ 9136 if (free_used_adj) PetscCall(PetscFree2(used_xadj,used_adjncy)); 9137 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9138 PetscFunctionReturn(0); 9139 } 9140 9141 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9142 { 9143 PC_IS *pcis=(PC_IS*)pc->data; 9144 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9145 PCBDDCGraph graph; 9146 9147 PetscFunctionBegin; 9148 /* attach interface graph for determining subsets */ 9149 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9150 IS verticesIS,verticescomm; 9151 PetscInt vsize,*idxs; 9152 9153 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9154 PetscCall(ISGetSize(verticesIS,&vsize)); 9155 PetscCall(ISGetIndices(verticesIS,(const PetscInt**)&idxs)); 9156 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm)); 9157 PetscCall(ISRestoreIndices(verticesIS,(const PetscInt**)&idxs)); 9158 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9159 PetscCall(PCBDDCGraphCreate(&graph)); 9160 PetscCall(PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount)); 9161 PetscCall(PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm)); 9162 PetscCall(ISDestroy(&verticescomm)); 9163 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9164 } else { 9165 graph = pcbddc->mat_graph; 9166 } 9167 /* print some info */ 9168 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9169 IS vertices; 9170 PetscInt nv,nedges,nfaces; 9171 PetscCall(PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 9172 PetscCall(PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9173 PetscCall(ISGetSize(vertices,&nv)); 9174 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9175 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 9176 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices)); 9177 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges)); 9178 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces)); 9179 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9180 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9181 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9182 } 9183 9184 /* sub_schurs init */ 9185 if (!pcbddc->sub_schurs) { 9186 PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9187 } 9188 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild,PETSC_FALSE)); 9189 9190 /* free graph struct */ 9191 if (pcbddc->sub_schurs_rebuild) { 9192 PetscCall(PCBDDCGraphDestroy(&graph)); 9193 } 9194 PetscFunctionReturn(0); 9195 } 9196 9197 PetscErrorCode PCBDDCCheckOperator(PC pc) 9198 { 9199 PC_IS *pcis=(PC_IS*)pc->data; 9200 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9201 9202 PetscFunctionBegin; 9203 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9204 IS zerodiag = NULL; 9205 Mat S_j,B0_B=NULL; 9206 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9207 PetscScalar *p0_check,*array,*array2; 9208 PetscReal norm; 9209 PetscInt i; 9210 9211 /* B0 and B0_B */ 9212 if (zerodiag) { 9213 IS dummy; 9214 9215 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy)); 9216 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 9217 PetscCall(MatCreateVecs(B0_B,NULL,&dummy_vec)); 9218 PetscCall(ISDestroy(&dummy)); 9219 } 9220 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9221 PetscCall(VecDuplicate(pcbddc->vec1_P,&vec_scale_P)); 9222 PetscCall(VecSet(pcbddc->vec1_P,1.0)); 9223 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9224 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9225 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9226 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9227 PetscCall(VecReciprocal(vec_scale_P)); 9228 /* S_j */ 9229 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9230 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9231 9232 /* mimic vector in \widetilde{W}_\Gamma */ 9233 PetscCall(VecSetRandom(pcis->vec1_N,NULL)); 9234 /* continuous in primal space */ 9235 PetscCall(VecSetRandom(pcbddc->coarse_vec,NULL)); 9236 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9237 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9238 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9239 PetscCall(PetscCalloc1(pcbddc->benign_n,&p0_check)); 9240 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9241 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9242 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9243 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9244 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9245 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9246 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9247 PetscCall(VecDuplicate(pcis->vec2_B,&vec_check_B)); 9248 PetscCall(VecCopy(pcis->vec2_B,vec_check_B)); 9249 9250 /* assemble rhs for coarse problem */ 9251 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9252 /* local with Schur */ 9253 PetscCall(MatMult(S_j,pcis->vec2_B,pcis->vec1_B)); 9254 if (zerodiag) { 9255 PetscCall(VecGetArray(dummy_vec,&array)); 9256 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9257 PetscCall(VecRestoreArray(dummy_vec,&array)); 9258 PetscCall(MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B)); 9259 } 9260 /* sum on primal nodes the local contributions */ 9261 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9262 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9263 PetscCall(VecGetArray(pcis->vec1_N,&array)); 9264 PetscCall(VecGetArray(pcbddc->vec1_P,&array2)); 9265 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9266 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array2)); 9267 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 9268 PetscCall(VecSet(pcbddc->coarse_vec,0.)); 9269 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9270 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9271 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9272 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9273 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9274 /* scale primal nodes (BDDC sums contibutions) */ 9275 PetscCall(VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P)); 9276 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9277 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9278 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9279 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9280 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9281 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9282 /* global: \widetilde{B0}_B w_\Gamma */ 9283 if (zerodiag) { 9284 PetscCall(MatMult(B0_B,pcis->vec2_B,dummy_vec)); 9285 PetscCall(VecGetArray(dummy_vec,&array)); 9286 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9287 PetscCall(VecRestoreArray(dummy_vec,&array)); 9288 } 9289 /* BDDC */ 9290 PetscCall(VecSet(pcis->vec1_D,0.)); 9291 PetscCall(PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE)); 9292 9293 PetscCall(VecCopy(pcis->vec1_B,pcis->vec2_B)); 9294 PetscCall(VecAXPY(pcis->vec1_B,-1.0,vec_check_B)); 9295 PetscCall(VecNorm(pcis->vec1_B,NORM_INFINITY,&norm)); 9296 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,(double)norm)); 9297 for (i=0;i<pcbddc->benign_n;i++) { 9298 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%" PetscInt_FMT "] error is %1.4e\n",PetscGlobalRank,i,(double)PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]))); 9299 } 9300 PetscCall(PetscFree(p0_check)); 9301 PetscCall(VecDestroy(&vec_scale_P)); 9302 PetscCall(VecDestroy(&vec_check_B)); 9303 PetscCall(VecDestroy(&dummy_vec)); 9304 PetscCall(MatDestroy(&S_j)); 9305 PetscCall(MatDestroy(&B0_B)); 9306 } 9307 PetscFunctionReturn(0); 9308 } 9309 9310 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9311 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9312 { 9313 Mat At; 9314 IS rows; 9315 PetscInt rst,ren; 9316 PetscLayout rmap; 9317 9318 PetscFunctionBegin; 9319 rst = ren = 0; 9320 if (ccomm != MPI_COMM_NULL) { 9321 PetscCall(PetscLayoutCreate(ccomm,&rmap)); 9322 PetscCall(PetscLayoutSetSize(rmap,A->rmap->N)); 9323 PetscCall(PetscLayoutSetBlockSize(rmap,1)); 9324 PetscCall(PetscLayoutSetUp(rmap)); 9325 PetscCall(PetscLayoutGetRange(rmap,&rst,&ren)); 9326 } 9327 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows)); 9328 PetscCall(MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At)); 9329 PetscCall(ISDestroy(&rows)); 9330 9331 if (ccomm != MPI_COMM_NULL) { 9332 Mat_MPIAIJ *a,*b; 9333 IS from,to; 9334 Vec gvec; 9335 PetscInt lsize; 9336 9337 PetscCall(MatCreate(ccomm,B)); 9338 PetscCall(MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N)); 9339 PetscCall(MatSetType(*B,MATAIJ)); 9340 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 9341 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9342 a = (Mat_MPIAIJ*)At->data; 9343 b = (Mat_MPIAIJ*)(*B)->data; 9344 PetscCallMPI(MPI_Comm_size(ccomm,&b->size)); 9345 PetscCallMPI(MPI_Comm_rank(ccomm,&b->rank)); 9346 PetscCall(PetscObjectReference((PetscObject)a->A)); 9347 PetscCall(PetscObjectReference((PetscObject)a->B)); 9348 b->A = a->A; 9349 b->B = a->B; 9350 9351 b->donotstash = a->donotstash; 9352 b->roworiented = a->roworiented; 9353 b->rowindices = NULL; 9354 b->rowvalues = NULL; 9355 b->getrowactive = PETSC_FALSE; 9356 9357 (*B)->rmap = rmap; 9358 (*B)->factortype = A->factortype; 9359 (*B)->assembled = PETSC_TRUE; 9360 (*B)->insertmode = NOT_SET_VALUES; 9361 (*B)->preallocated = PETSC_TRUE; 9362 9363 if (a->colmap) { 9364 #if defined(PETSC_USE_CTABLE) 9365 PetscCall(PetscTableCreateCopy(a->colmap,&b->colmap)); 9366 #else 9367 PetscCall(PetscMalloc1(At->cmap->N,&b->colmap)); 9368 PetscCall(PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt))); 9369 PetscCall(PetscArraycpy(b->colmap,a->colmap,At->cmap->N)); 9370 #endif 9371 } else b->colmap = NULL; 9372 if (a->garray) { 9373 PetscInt len; 9374 len = a->B->cmap->n; 9375 PetscCall(PetscMalloc1(len+1,&b->garray)); 9376 PetscCall(PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt))); 9377 if (len) PetscCall(PetscArraycpy(b->garray,a->garray,len)); 9378 } else b->garray = NULL; 9379 9380 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9381 b->lvec = a->lvec; 9382 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec)); 9383 9384 /* cannot use VecScatterCopy */ 9385 PetscCall(VecGetLocalSize(b->lvec,&lsize)); 9386 PetscCall(ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from)); 9387 PetscCall(ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to)); 9388 PetscCall(MatCreateVecs(*B,&gvec,NULL)); 9389 PetscCall(VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx)); 9390 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx)); 9391 PetscCall(ISDestroy(&from)); 9392 PetscCall(ISDestroy(&to)); 9393 PetscCall(VecDestroy(&gvec)); 9394 } 9395 PetscCall(MatDestroy(&At)); 9396 PetscFunctionReturn(0); 9397 } 9398