1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 PetscCall(MatGetSize(A,&nr,&nc)); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 PetscCall(PetscMalloc1(ulw,&uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 PetscCall(PetscMalloc1(n,&sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 PetscCall(PetscMalloc1(nr*nr,&U)); 46 PetscCall(PetscBLASIntCast(nr,&bM)); 47 PetscCall(PetscBLASIntCast(nc,&bN)); 48 PetscCall(PetscBLASIntCast(ulw,&lwork)); 49 PetscCall(MatDenseGetArray(A,&data)); 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 53 #else 54 PetscCall(PetscMalloc1(5*n,&rwork2)); 55 PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 56 PetscCall(PetscFree(rwork2)); 57 #endif 58 PetscCall(PetscFPTrapPop()); 59 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 60 PetscCall(MatDenseRestoreArray(A,&data)); 61 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 62 if (!rwork) { 63 PetscCall(PetscFree(sing)); 64 } 65 if (!work) { 66 PetscCall(PetscFree(uwork)); 67 } 68 /* create B */ 69 if (!range) { 70 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B)); 71 PetscCall(MatDenseGetArray(*B,&data)); 72 PetscCall(PetscArraycpy(data,U+nr*i,(nr-i)*nr)); 73 } else { 74 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B)); 75 PetscCall(MatDenseGetArray(*B,&data)); 76 PetscCall(PetscArraycpy(data,U,i*nr)); 77 } 78 PetscCall(MatDenseRestoreArray(*B,&data)); 79 PetscCall(PetscFree(U)); 80 PetscFunctionReturn(0); 81 } 82 83 /* TODO REMOVE */ 84 #if defined(PRINT_GDET) 85 static int inc = 0; 86 static int lev = 0; 87 #endif 88 89 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 90 { 91 Mat GE,GEd; 92 PetscInt rsize,csize,esize; 93 PetscScalar *ptr; 94 95 PetscFunctionBegin; 96 PetscCall(ISGetSize(edge,&esize)); 97 if (!esize) PetscFunctionReturn(0); 98 PetscCall(ISGetSize(extrow,&rsize)); 99 PetscCall(ISGetSize(extcol,&csize)); 100 101 /* gradients */ 102 ptr = work + 5*esize; 103 PetscCall(MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE)); 104 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins)); 105 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins)); 106 PetscCall(MatDestroy(&GE)); 107 108 /* constants */ 109 ptr += rsize*csize; 110 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd)); 111 PetscCall(MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE)); 112 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd)); 113 PetscCall(MatDestroy(&GE)); 114 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins)); 115 PetscCall(MatDestroy(&GEd)); 116 117 if (corners) { 118 Mat GEc; 119 const PetscScalar *vals; 120 PetscScalar v; 121 122 PetscCall(MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc)); 123 PetscCall(MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd)); 124 PetscCall(MatDenseGetArrayRead(GEd,&vals)); 125 /* v = PetscAbsScalar(vals[0]) */; 126 v = 1.; 127 cvals[0] = vals[0]/v; 128 cvals[1] = vals[1]/v; 129 PetscCall(MatDenseRestoreArrayRead(GEd,&vals)); 130 PetscCall(MatScale(*GKins,1./v)); 131 #if defined(PRINT_GDET) 132 { 133 PetscViewer viewer; 134 char filename[256]; 135 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 136 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 137 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 138 PetscCall(PetscObjectSetName((PetscObject)GEc,"GEc")); 139 PetscCall(MatView(GEc,viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)(*GKins),"GK")); 141 PetscCall(MatView(*GKins,viewer)); 142 PetscCall(PetscObjectSetName((PetscObject)GEd,"Gproj")); 143 PetscCall(MatView(GEd,viewer)); 144 PetscCall(PetscViewerDestroy(&viewer)); 145 } 146 #endif 147 PetscCall(MatDestroy(&GEd)); 148 PetscCall(MatDestroy(&GEc)); 149 } 150 151 PetscFunctionReturn(0); 152 } 153 154 PetscErrorCode PCBDDCNedelecSupport(PC pc) 155 { 156 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 157 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 158 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 159 Vec tvec; 160 PetscSF sfv; 161 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 162 MPI_Comm comm; 163 IS lned,primals,allprimals,nedfieldlocal; 164 IS *eedges,*extrows,*extcols,*alleedges; 165 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 166 PetscScalar *vals,*work; 167 PetscReal *rwork; 168 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 169 PetscInt ne,nv,Lv,order,n,field; 170 PetscInt n_neigh,*neigh,*n_shared,**shared; 171 PetscInt i,j,extmem,cum,maxsize,nee; 172 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 173 PetscInt *sfvleaves,*sfvroots; 174 PetscInt *corners,*cedges; 175 PetscInt *ecount,**eneighs,*vcount,**vneighs; 176 PetscInt *emarks; 177 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 178 179 PetscFunctionBegin; 180 /* If the discrete gradient is defined for a subset of dofs and global is true, 181 it assumes G is given in global ordering for all the dofs. 182 Otherwise, the ordering is global for the Nedelec field */ 183 order = pcbddc->nedorder; 184 conforming = pcbddc->conforming; 185 field = pcbddc->nedfield; 186 global = pcbddc->nedglobal; 187 setprimal = PETSC_FALSE; 188 print = PETSC_FALSE; 189 singular = PETSC_FALSE; 190 191 /* Command line customization */ 192 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC"); 193 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL)); 194 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL)); 195 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL)); 196 /* print debug info TODO: to be removed */ 197 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL)); 198 PetscOptionsEnd(); 199 200 /* Return if there are no edges in the decomposition and the problem is not singular */ 201 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&al2g,NULL)); 202 PetscCall(ISLocalToGlobalMappingGetSize(al2g,&n)); 203 PetscCall(PetscObjectGetComm((PetscObject)pc,&comm)); 204 if (!singular) { 205 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 206 lrc[0] = PETSC_FALSE; 207 for (i=0;i<n;i++) { 208 if (PetscRealPart(vals[i]) > 2.) { 209 lrc[0] = PETSC_TRUE; 210 break; 211 } 212 } 213 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 214 PetscCall(MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm)); 215 if (!lrc[1]) PetscFunctionReturn(0); 216 } 217 218 /* Get Nedelec field */ 219 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal,comm,PETSC_ERR_USER,"Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT,field,pcbddc->n_ISForDofsLocal); 220 if (pcbddc->n_ISForDofsLocal && field >= 0) { 221 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 222 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 223 PetscCall(ISGetLocalSize(nedfieldlocal,&ne)); 224 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 225 ne = n; 226 nedfieldlocal = NULL; 227 global = PETSC_TRUE; 228 } else if (field == PETSC_DECIDE) { 229 PetscInt rst,ren,*idx; 230 231 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 232 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 233 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren)); 234 for (i=rst;i<ren;i++) { 235 PetscInt nc; 236 237 PetscCall(MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 238 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 239 PetscCall(MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 240 } 241 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 242 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 243 PetscCall(PetscMalloc1(n,&idx)); 244 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 245 PetscCall(ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal)); 246 } else { 247 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 248 } 249 250 /* Sanity checks */ 251 PetscCheck(order || conforming,comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 252 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix,comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 253 PetscCheck(!order || (ne%order == 0),PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT,ne,order); 254 255 /* Just set primal dofs and return */ 256 if (setprimal) { 257 IS enedfieldlocal; 258 PetscInt *eidxs; 259 260 PetscCall(PetscMalloc1(ne,&eidxs)); 261 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 262 if (nedfieldlocal) { 263 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 264 for (i=0,cum=0;i<ne;i++) { 265 if (PetscRealPart(vals[idxs[i]]) > 2.) { 266 eidxs[cum++] = idxs[i]; 267 } 268 } 269 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 270 } else { 271 for (i=0,cum=0;i<ne;i++) { 272 if (PetscRealPart(vals[i]) > 2.) { 273 eidxs[cum++] = i; 274 } 275 } 276 } 277 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 278 PetscCall(ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal)); 279 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal)); 280 PetscCall(PetscFree(eidxs)); 281 PetscCall(ISDestroy(&nedfieldlocal)); 282 PetscCall(ISDestroy(&enedfieldlocal)); 283 PetscFunctionReturn(0); 284 } 285 286 /* Compute some l2g maps */ 287 if (nedfieldlocal) { 288 IS is; 289 290 /* need to map from the local Nedelec field to local numbering */ 291 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g)); 292 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 293 PetscCall(ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is)); 294 PetscCall(ISLocalToGlobalMappingCreateIS(is,&al2g)); 295 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 296 if (global) { 297 PetscCall(PetscObjectReference((PetscObject)al2g)); 298 el2g = al2g; 299 } else { 300 IS gis; 301 302 PetscCall(ISRenumber(is,NULL,NULL,&gis)); 303 PetscCall(ISLocalToGlobalMappingCreateIS(gis,&el2g)); 304 PetscCall(ISDestroy(&gis)); 305 } 306 PetscCall(ISDestroy(&is)); 307 } else { 308 /* restore default */ 309 pcbddc->nedfield = -1; 310 /* one ref for the destruction of al2g, one for el2g */ 311 PetscCall(PetscObjectReference((PetscObject)al2g)); 312 PetscCall(PetscObjectReference((PetscObject)al2g)); 313 el2g = al2g; 314 fl2g = NULL; 315 } 316 317 /* Start communication to drop connections for interior edges (for cc analysis only) */ 318 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 319 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 320 if (nedfieldlocal) { 321 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 322 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 323 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 324 } else { 325 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 326 } 327 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 328 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 329 330 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 331 PetscCall(MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G)); 332 PetscCall(MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 333 if (global) { 334 PetscInt rst; 335 336 PetscCall(MatGetOwnershipRange(G,&rst,NULL)); 337 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 338 if (matis->sf_rootdata[i] < 2) { 339 matis->sf_rootdata[cum++] = i + rst; 340 } 341 } 342 PetscCall(MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE)); 343 PetscCall(MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL)); 344 } else { 345 PetscInt *tbz; 346 347 PetscCall(PetscMalloc1(ne,&tbz)); 348 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 349 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 350 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 351 for (i=0,cum=0;i<ne;i++) 352 if (matis->sf_leafdata[idxs[i]] == 1) 353 tbz[cum++] = i; 354 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 355 PetscCall(ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz)); 356 PetscCall(MatZeroRows(G,cum,tbz,0.,NULL,NULL)); 357 PetscCall(PetscFree(tbz)); 358 } 359 } else { /* we need the entire G to infer the nullspace */ 360 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 361 G = pcbddc->discretegradient; 362 } 363 364 /* Extract subdomain relevant rows of G */ 365 PetscCall(ISLocalToGlobalMappingGetIndices(el2g,&idxs)); 366 PetscCall(ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned)); 367 PetscCall(MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall)); 368 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g,&idxs)); 369 PetscCall(ISDestroy(&lned)); 370 PetscCall(MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis)); 371 PetscCall(MatDestroy(&lGall)); 372 PetscCall(MatISGetLocalMat(lGis,&lG)); 373 374 /* SF for nodal dofs communications */ 375 PetscCall(MatGetLocalSize(G,NULL,&Lv)); 376 PetscCall(MatISGetLocalToGlobalMapping(lGis,NULL,&vl2g)); 377 PetscCall(PetscObjectReference((PetscObject)vl2g)); 378 PetscCall(ISLocalToGlobalMappingGetSize(vl2g,&nv)); 379 PetscCall(PetscSFCreate(comm,&sfv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g,&idxs)); 381 PetscCall(PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs)); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs)); 383 i = singular ? 2 : 1; 384 PetscCall(PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots)); 385 386 /* Destroy temporary G created in MATIS format and modified G */ 387 PetscCall(PetscObjectReference((PetscObject)lG)); 388 PetscCall(MatDestroy(&lGis)); 389 PetscCall(MatDestroy(&G)); 390 391 if (print) { 392 PetscCall(PetscObjectSetName((PetscObject)lG,"initial_lG")); 393 PetscCall(MatView(lG,NULL)); 394 } 395 396 /* Save lG for values insertion in change of basis */ 397 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGinit)); 398 399 /* Analyze the edge-nodes connections (duplicate lG) */ 400 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGe)); 401 PetscCall(MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 402 PetscCall(PetscBTCreate(nv,&btv)); 403 PetscCall(PetscBTCreate(ne,&bte)); 404 PetscCall(PetscBTCreate(ne,&btb)); 405 PetscCall(PetscBTCreate(ne,&btbd)); 406 PetscCall(PetscBTCreate(nv,&btvcand)); 407 /* need to import the boundary specification to ensure the 408 proper detection of coarse edges' endpoints */ 409 if (pcbddc->DirichletBoundariesLocal) { 410 IS is; 411 412 if (fl2g) { 413 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is)); 414 } else { 415 is = pcbddc->DirichletBoundariesLocal; 416 } 417 PetscCall(ISGetLocalSize(is,&cum)); 418 PetscCall(ISGetIndices(is,&idxs)); 419 for (i=0;i<cum;i++) { 420 if (idxs[i] >= 0) { 421 PetscCall(PetscBTSet(btb,idxs[i])); 422 PetscCall(PetscBTSet(btbd,idxs[i])); 423 } 424 } 425 PetscCall(ISRestoreIndices(is,&idxs)); 426 if (fl2g) { 427 PetscCall(ISDestroy(&is)); 428 } 429 } 430 if (pcbddc->NeumannBoundariesLocal) { 431 IS is; 432 433 if (fl2g) { 434 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is)); 435 } else { 436 is = pcbddc->NeumannBoundariesLocal; 437 } 438 PetscCall(ISGetLocalSize(is,&cum)); 439 PetscCall(ISGetIndices(is,&idxs)); 440 for (i=0;i<cum;i++) { 441 if (idxs[i] >= 0) { 442 PetscCall(PetscBTSet(btb,idxs[i])); 443 } 444 } 445 PetscCall(ISRestoreIndices(is,&idxs)); 446 if (fl2g) { 447 PetscCall(ISDestroy(&is)); 448 } 449 } 450 451 /* Count neighs per dof */ 452 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs)); 453 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs)); 454 455 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 456 for proper detection of coarse edges' endpoints */ 457 PetscCall(PetscBTCreate(ne,&btee)); 458 for (i=0;i<ne;i++) { 459 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 460 PetscCall(PetscBTSet(btee,i)); 461 } 462 } 463 PetscCall(PetscMalloc1(ne,&marks)); 464 if (!conforming) { 465 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 466 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 467 } 468 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 469 PetscCall(MatSeqAIJGetArray(lGe,&vals)); 470 cum = 0; 471 for (i=0;i<ne;i++) { 472 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 473 if (!PetscBTLookup(btee,i)) { 474 marks[cum++] = i; 475 continue; 476 } 477 /* set badly connected edge dofs as primal */ 478 if (!conforming) { 479 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 480 marks[cum++] = i; 481 PetscCall(PetscBTSet(bte,i)); 482 for (j=ii[i];j<ii[i+1];j++) { 483 PetscCall(PetscBTSet(btv,jj[j])); 484 } 485 } else { 486 /* every edge dofs should be connected trough a certain number of nodal dofs 487 to other edge dofs belonging to coarse edges 488 - at most 2 endpoints 489 - order-1 interior nodal dofs 490 - no undefined nodal dofs (nconn < order) 491 */ 492 PetscInt ends = 0,ints = 0, undef = 0; 493 for (j=ii[i];j<ii[i+1];j++) { 494 PetscInt v = jj[j],k; 495 PetscInt nconn = iit[v+1]-iit[v]; 496 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 497 if (nconn > order) ends++; 498 else if (nconn == order) ints++; 499 else undef++; 500 } 501 if (undef || ends > 2 || ints != order -1) { 502 marks[cum++] = i; 503 PetscCall(PetscBTSet(bte,i)); 504 for (j=ii[i];j<ii[i+1];j++) { 505 PetscCall(PetscBTSet(btv,jj[j])); 506 } 507 } 508 } 509 } 510 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 511 if (!order && ii[i+1] != ii[i]) { 512 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 513 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 514 } 515 } 516 PetscCall(PetscBTDestroy(&btee)); 517 PetscCall(MatSeqAIJRestoreArray(lGe,&vals)); 518 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 519 if (!conforming) { 520 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 521 PetscCall(MatDestroy(&lGt)); 522 } 523 PetscCall(MatZeroRows(lGe,cum,marks,0.,NULL,NULL)); 524 525 /* identify splitpoints and corner candidates */ 526 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 527 if (print) { 528 PetscCall(PetscObjectSetName((PetscObject)lGe,"edgerestr_lG")); 529 PetscCall(MatView(lGe,NULL)); 530 PetscCall(PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt")); 531 PetscCall(MatView(lGt,NULL)); 532 } 533 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 534 PetscCall(MatSeqAIJGetArray(lGt,&vals)); 535 for (i=0;i<nv;i++) { 536 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 537 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 538 if (!order) { /* variable order */ 539 PetscReal vorder = 0.; 540 541 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 542 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 543 PetscCheck(vorder-test <= PETSC_SQRT_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%" PetscInt_FMT ")",(double)vorder,test); 544 ord = 1; 545 } 546 PetscAssert(test%ord == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT,test,i,ord); 547 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 548 if (PetscBTLookup(btbd,jj[j])) { 549 bdir = PETSC_TRUE; 550 break; 551 } 552 if (vc != ecount[jj[j]]) { 553 sneighs = PETSC_FALSE; 554 } else { 555 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 556 for (k=0;k<vc;k++) { 557 if (vn[k] != en[k]) { 558 sneighs = PETSC_FALSE; 559 break; 560 } 561 } 562 } 563 } 564 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 565 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n",i,PetscBools[!sneighs],PetscBools[test >= 3*ord],PetscBools[bdir]); 566 PetscCall(PetscBTSet(btv,i)); 567 } else if (test == ord) { 568 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 569 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %" PetscInt_FMT "\n",i); 570 PetscCall(PetscBTSet(btv,i)); 571 } else { 572 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %" PetscInt_FMT "\n",i); 573 PetscCall(PetscBTSet(btvcand,i)); 574 } 575 } 576 } 577 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs)); 578 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs)); 579 PetscCall(PetscBTDestroy(&btbd)); 580 581 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 582 if (order != 1) { 583 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 584 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 585 for (i=0;i<nv;i++) { 586 if (PetscBTLookup(btvcand,i)) { 587 PetscBool found = PETSC_FALSE; 588 for (j=ii[i];j<ii[i+1] && !found;j++) { 589 PetscInt k,e = jj[j]; 590 if (PetscBTLookup(bte,e)) continue; 591 for (k=iit[e];k<iit[e+1];k++) { 592 PetscInt v = jjt[k]; 593 if (v != i && PetscBTLookup(btvcand,v)) { 594 found = PETSC_TRUE; 595 break; 596 } 597 } 598 } 599 if (!found) { 600 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %" PetscInt_FMT " CLEARED\n",i); 601 PetscCall(PetscBTClear(btvcand,i)); 602 } else { 603 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %" PetscInt_FMT " ACCEPTED\n",i); 604 } 605 } 606 } 607 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 608 } 609 PetscCall(MatSeqAIJRestoreArray(lGt,&vals)); 610 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 611 PetscCall(MatDestroy(&lGe)); 612 613 /* Get the local G^T explicitly */ 614 PetscCall(MatDestroy(&lGt)); 615 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 616 PetscCall(MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 617 618 /* Mark interior nodal dofs */ 619 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 620 PetscCall(PetscBTCreate(nv,&btvi)); 621 for (i=1;i<n_neigh;i++) { 622 for (j=0;j<n_shared[i];j++) { 623 PetscCall(PetscBTSet(btvi,shared[i][j])); 624 } 625 } 626 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 627 628 /* communicate corners and splitpoints */ 629 PetscCall(PetscMalloc1(nv,&vmarks)); 630 PetscCall(PetscArrayzero(sfvleaves,nv)); 631 PetscCall(PetscArrayzero(sfvroots,Lv)); 632 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 633 634 if (print) { 635 IS tbz; 636 637 cum = 0; 638 for (i=0;i<nv;i++) 639 if (sfvleaves[i]) 640 vmarks[cum++] = i; 641 642 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 643 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local")); 644 PetscCall(ISView(tbz,NULL)); 645 PetscCall(ISDestroy(&tbz)); 646 } 647 648 PetscCall(PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 649 PetscCall(PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 650 PetscCall(PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 651 PetscCall(PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 652 653 /* Zero rows of lGt corresponding to identified corners 654 and interior nodal dofs */ 655 cum = 0; 656 for (i=0;i<nv;i++) { 657 if (sfvleaves[i]) { 658 vmarks[cum++] = i; 659 PetscCall(PetscBTSet(btv,i)); 660 } 661 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 662 } 663 PetscCall(PetscBTDestroy(&btvi)); 664 if (print) { 665 IS tbz; 666 667 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 668 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior")); 669 PetscCall(ISView(tbz,NULL)); 670 PetscCall(ISDestroy(&tbz)); 671 } 672 PetscCall(MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL)); 673 PetscCall(PetscFree(vmarks)); 674 PetscCall(PetscSFDestroy(&sfv)); 675 PetscCall(PetscFree2(sfvleaves,sfvroots)); 676 677 /* Recompute G */ 678 PetscCall(MatDestroy(&lG)); 679 PetscCall(MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG)); 680 if (print) { 681 PetscCall(PetscObjectSetName((PetscObject)lG,"used_lG")); 682 PetscCall(MatView(lG,NULL)); 683 PetscCall(PetscObjectSetName((PetscObject)lGt,"used_lGt")); 684 PetscCall(MatView(lGt,NULL)); 685 } 686 687 /* Get primal dofs (if any) */ 688 cum = 0; 689 for (i=0;i<ne;i++) { 690 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 691 } 692 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,marks,marks)); 693 PetscCall(ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals)); 694 if (print) { 695 PetscCall(PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs")); 696 PetscCall(ISView(primals,NULL)); 697 } 698 PetscCall(PetscBTDestroy(&bte)); 699 /* TODO: what if the user passed in some of them ? */ 700 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 701 PetscCall(ISDestroy(&primals)); 702 703 /* Compute edge connectivity */ 704 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_")); 705 706 /* Symbolic conn = lG*lGt */ 707 PetscCall(MatProductCreate(lG,lGt,NULL,&conn)); 708 PetscCall(MatProductSetType(conn,MATPRODUCT_AB)); 709 PetscCall(MatProductSetAlgorithm(conn,"default")); 710 PetscCall(MatProductSetFill(conn,PETSC_DEFAULT)); 711 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_")); 712 PetscCall(MatProductSetFromOptions(conn)); 713 PetscCall(MatProductSymbolic(conn)); 714 715 PetscCall(MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 716 if (fl2g) { 717 PetscBT btf; 718 PetscInt *iia,*jja,*iiu,*jju; 719 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 720 721 /* create CSR for all local dofs */ 722 PetscCall(PetscMalloc1(n+1,&iia)); 723 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 724 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT,pcbddc->mat_graph->nvtxs_csr,n); 725 iiu = pcbddc->mat_graph->xadj; 726 jju = pcbddc->mat_graph->adjncy; 727 } else if (pcbddc->use_local_adj) { 728 rest = PETSC_TRUE; 729 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 730 } else { 731 free = PETSC_TRUE; 732 PetscCall(PetscMalloc2(n+1,&iiu,n,&jju)); 733 iiu[0] = 0; 734 for (i=0;i<n;i++) { 735 iiu[i+1] = i+1; 736 jju[i] = -1; 737 } 738 } 739 740 /* import sizes of CSR */ 741 iia[0] = 0; 742 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 743 744 /* overwrite entries corresponding to the Nedelec field */ 745 PetscCall(PetscBTCreate(n,&btf)); 746 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 747 for (i=0;i<ne;i++) { 748 PetscCall(PetscBTSet(btf,idxs[i])); 749 iia[idxs[i]+1] = ii[i+1]-ii[i]; 750 } 751 752 /* iia in CSR */ 753 for (i=0;i<n;i++) iia[i+1] += iia[i]; 754 755 /* jja in CSR */ 756 PetscCall(PetscMalloc1(iia[n],&jja)); 757 for (i=0;i<n;i++) 758 if (!PetscBTLookup(btf,i)) 759 for (j=0;j<iiu[i+1]-iiu[i];j++) 760 jja[iia[i]+j] = jju[iiu[i]+j]; 761 762 /* map edge dofs connectivity */ 763 if (jj) { 764 PetscCall(ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj)); 765 for (i=0;i<ne;i++) { 766 PetscInt e = idxs[i]; 767 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 768 } 769 } 770 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 771 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER)); 772 if (rest) { 773 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 774 } 775 if (free) PetscCall(PetscFree2(iiu,jju)); 776 PetscCall(PetscBTDestroy(&btf)); 777 } else { 778 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER)); 779 } 780 781 /* Analyze interface for edge dofs */ 782 PetscCall(PCBDDCAnalyzeInterface(pc)); 783 pcbddc->mat_graph->twodim = PETSC_FALSE; 784 785 /* Get coarse edges in the edge space */ 786 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 787 PetscCall(MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 788 789 if (fl2g) { 790 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 791 PetscCall(PetscMalloc1(nee,&eedges)); 792 for (i=0;i<nee;i++) { 793 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 794 } 795 } else { 796 eedges = alleedges; 797 primals = allprimals; 798 } 799 800 /* Mark fine edge dofs with their coarse edge id */ 801 PetscCall(PetscArrayzero(marks,ne)); 802 PetscCall(ISGetLocalSize(primals,&cum)); 803 PetscCall(ISGetIndices(primals,&idxs)); 804 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 805 PetscCall(ISRestoreIndices(primals,&idxs)); 806 if (print) { 807 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs")); 808 PetscCall(ISView(primals,NULL)); 809 } 810 811 maxsize = 0; 812 for (i=0;i<nee;i++) { 813 PetscInt size,mark = i+1; 814 815 PetscCall(ISGetLocalSize(eedges[i],&size)); 816 PetscCall(ISGetIndices(eedges[i],&idxs)); 817 for (j=0;j<size;j++) marks[idxs[j]] = mark; 818 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 819 maxsize = PetscMax(maxsize,size); 820 } 821 822 /* Find coarse edge endpoints */ 823 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 824 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 825 for (i=0;i<nee;i++) { 826 PetscInt mark = i+1,size; 827 828 PetscCall(ISGetLocalSize(eedges[i],&size)); 829 if (!size && nedfieldlocal) continue; 830 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 831 PetscCall(ISGetIndices(eedges[i],&idxs)); 832 if (print) { 833 PetscCall(PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n",i)); 834 PetscCall(ISView(eedges[i],NULL)); 835 } 836 for (j=0;j<size;j++) { 837 PetscInt k, ee = idxs[j]; 838 if (print) PetscPrintf(PETSC_COMM_SELF," idx %" PetscInt_FMT "\n",ee); 839 for (k=ii[ee];k<ii[ee+1];k++) { 840 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %" PetscInt_FMT "\n",jj[k]); 841 if (PetscBTLookup(btv,jj[k])) { 842 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %" PetscInt_FMT "\n",jj[k]); 843 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 844 PetscInt k2; 845 PetscBool corner = PETSC_FALSE; 846 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 847 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,(int)!!PetscBTLookup(btb,jjt[k2])); 848 /* it's a corner if either is connected with an edge dof belonging to a different cc or 849 if the edge dof lie on the natural part of the boundary */ 850 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 851 corner = PETSC_TRUE; 852 break; 853 } 854 } 855 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 856 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %" PetscInt_FMT "\n",jj[k]); 857 PetscCall(PetscBTSet(btv,jj[k])); 858 } else { 859 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 860 } 861 } 862 } 863 } 864 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 865 } 866 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 867 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 868 PetscCall(PetscBTDestroy(&btb)); 869 870 /* Reset marked primal dofs */ 871 PetscCall(ISGetLocalSize(primals,&cum)); 872 PetscCall(ISGetIndices(primals,&idxs)); 873 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 874 PetscCall(ISRestoreIndices(primals,&idxs)); 875 876 /* Now use the initial lG */ 877 PetscCall(MatDestroy(&lG)); 878 PetscCall(MatDestroy(&lGt)); 879 lG = lGinit; 880 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 881 882 /* Compute extended cols indices */ 883 PetscCall(PetscBTCreate(nv,&btvc)); 884 PetscCall(PetscBTCreate(nee,&bter)); 885 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 886 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG,&i)); 887 i *= maxsize; 888 PetscCall(PetscCalloc1(nee,&extcols)); 889 PetscCall(PetscMalloc2(i,&extrow,i,&gidxs)); 890 eerr = PETSC_FALSE; 891 for (i=0;i<nee;i++) { 892 PetscInt size,found = 0; 893 894 cum = 0; 895 PetscCall(ISGetLocalSize(eedges[i],&size)); 896 if (!size && nedfieldlocal) continue; 897 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 898 PetscCall(ISGetIndices(eedges[i],&idxs)); 899 PetscCall(PetscBTMemzero(nv,btvc)); 900 for (j=0;j<size;j++) { 901 PetscInt k,ee = idxs[j]; 902 for (k=ii[ee];k<ii[ee+1];k++) { 903 PetscInt vv = jj[k]; 904 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 905 else if (!PetscBTLookupSet(btvc,vv)) found++; 906 } 907 } 908 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 909 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 910 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 911 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 912 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 913 /* it may happen that endpoints are not defined at this point 914 if it is the case, mark this edge for a second pass */ 915 if (cum != size -1 || found != 2) { 916 PetscCall(PetscBTSet(bter,i)); 917 if (print) { 918 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge")); 919 PetscCall(ISView(eedges[i],NULL)); 920 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol")); 921 PetscCall(ISView(extcols[i],NULL)); 922 } 923 eerr = PETSC_TRUE; 924 } 925 } 926 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 927 PetscCall(MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm)); 928 if (done) { 929 PetscInt *newprimals; 930 931 PetscCall(PetscMalloc1(ne,&newprimals)); 932 PetscCall(ISGetLocalSize(primals,&cum)); 933 PetscCall(ISGetIndices(primals,&idxs)); 934 PetscCall(PetscArraycpy(newprimals,idxs,cum)); 935 PetscCall(ISRestoreIndices(primals,&idxs)); 936 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 937 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %s)\n",PetscBools[eerr]); 938 for (i=0;i<nee;i++) { 939 PetscBool has_candidates = PETSC_FALSE; 940 if (PetscBTLookup(bter,i)) { 941 PetscInt size,mark = i+1; 942 943 PetscCall(ISGetLocalSize(eedges[i],&size)); 944 PetscCall(ISGetIndices(eedges[i],&idxs)); 945 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 946 for (j=0;j<size;j++) { 947 PetscInt k,ee = idxs[j]; 948 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n",ee,ii[ee],ii[ee+1]); 949 for (k=ii[ee];k<ii[ee+1];k++) { 950 /* set all candidates located on the edge as corners */ 951 if (PetscBTLookup(btvcand,jj[k])) { 952 PetscInt k2,vv = jj[k]; 953 has_candidates = PETSC_TRUE; 954 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %" PetscInt_FMT "\n",vv); 955 PetscCall(PetscBTSet(btv,vv)); 956 /* set all edge dofs connected to candidate as primals */ 957 for (k2=iit[vv];k2<iit[vv+1];k2++) { 958 if (marks[jjt[k2]] == mark) { 959 PetscInt k3,ee2 = jjt[k2]; 960 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %" PetscInt_FMT "\n",ee2); 961 newprimals[cum++] = ee2; 962 /* finally set the new corners */ 963 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 964 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %" PetscInt_FMT "\n",jj[k3]); 965 PetscCall(PetscBTSet(btv,jj[k3])); 966 } 967 } 968 } 969 } else { 970 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %" PetscInt_FMT "\n",jj[k]); 971 } 972 } 973 } 974 if (!has_candidates) { /* circular edge */ 975 PetscInt k, ee = idxs[0],*tmarks; 976 977 PetscCall(PetscCalloc1(ne,&tmarks)); 978 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %" PetscInt_FMT "\n",i); 979 for (k=ii[ee];k<ii[ee+1];k++) { 980 PetscInt k2; 981 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %" PetscInt_FMT "\n",jj[k]); 982 PetscCall(PetscBTSet(btv,jj[k])); 983 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 984 } 985 for (j=0;j<size;j++) { 986 if (tmarks[idxs[j]] > 1) { 987 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %" PetscInt_FMT "\n",idxs[j]); 988 newprimals[cum++] = idxs[j]; 989 } 990 } 991 PetscCall(PetscFree(tmarks)); 992 } 993 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 994 } 995 PetscCall(ISDestroy(&extcols[i])); 996 } 997 PetscCall(PetscFree(extcols)); 998 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 999 PetscCall(PetscSortRemoveDupsInt(&cum,newprimals)); 1000 if (fl2g) { 1001 PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals)); 1002 PetscCall(ISDestroy(&primals)); 1003 for (i=0;i<nee;i++) { 1004 PetscCall(ISDestroy(&eedges[i])); 1005 } 1006 PetscCall(PetscFree(eedges)); 1007 } 1008 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1009 PetscCall(ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals)); 1010 PetscCall(PetscFree(newprimals)); 1011 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 1012 PetscCall(ISDestroy(&primals)); 1013 PetscCall(PCBDDCAnalyzeInterface(pc)); 1014 pcbddc->mat_graph->twodim = PETSC_FALSE; 1015 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1016 if (fl2g) { 1017 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 1018 PetscCall(PetscMalloc1(nee,&eedges)); 1019 for (i=0;i<nee;i++) { 1020 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 1021 } 1022 } else { 1023 eedges = alleedges; 1024 primals = allprimals; 1025 } 1026 PetscCall(PetscCalloc1(nee,&extcols)); 1027 1028 /* Mark again */ 1029 PetscCall(PetscArrayzero(marks,ne)); 1030 for (i=0;i<nee;i++) { 1031 PetscInt size,mark = i+1; 1032 1033 PetscCall(ISGetLocalSize(eedges[i],&size)); 1034 PetscCall(ISGetIndices(eedges[i],&idxs)); 1035 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1036 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1037 } 1038 if (print) { 1039 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass")); 1040 PetscCall(ISView(primals,NULL)); 1041 } 1042 1043 /* Recompute extended cols */ 1044 eerr = PETSC_FALSE; 1045 for (i=0;i<nee;i++) { 1046 PetscInt size; 1047 1048 cum = 0; 1049 PetscCall(ISGetLocalSize(eedges[i],&size)); 1050 if (!size && nedfieldlocal) continue; 1051 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 1052 PetscCall(ISGetIndices(eedges[i],&idxs)); 1053 for (j=0;j<size;j++) { 1054 PetscInt k,ee = idxs[j]; 1055 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1056 } 1057 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1058 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 1059 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 1060 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 1061 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 1062 if (cum != size -1) { 1063 if (print) { 1064 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass")); 1065 PetscCall(ISView(eedges[i],NULL)); 1066 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass")); 1067 PetscCall(ISView(extcols[i],NULL)); 1068 } 1069 eerr = PETSC_TRUE; 1070 } 1071 } 1072 } 1073 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1074 PetscCall(PetscFree2(extrow,gidxs)); 1075 PetscCall(PetscBTDestroy(&bter)); 1076 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF)); 1077 /* an error should not occur at this point */ 1078 PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1079 1080 /* Check the number of endpoints */ 1081 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1082 PetscCall(PetscMalloc1(2*nee,&corners)); 1083 PetscCall(PetscMalloc1(nee,&cedges)); 1084 for (i=0;i<nee;i++) { 1085 PetscInt size, found = 0, gc[2]; 1086 1087 /* init with defaults */ 1088 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1089 PetscCall(ISGetLocalSize(eedges[i],&size)); 1090 if (!size && nedfieldlocal) continue; 1091 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 1092 PetscCall(ISGetIndices(eedges[i],&idxs)); 1093 PetscCall(PetscBTMemzero(nv,btvc)); 1094 for (j=0;j<size;j++) { 1095 PetscInt k,ee = idxs[j]; 1096 for (k=ii[ee];k<ii[ee+1];k++) { 1097 PetscInt vv = jj[k]; 1098 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1099 PetscCheck(found != 2,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %" PetscInt_FMT,i); 1100 corners[i*2+found++] = vv; 1101 } 1102 } 1103 } 1104 if (found != 2) { 1105 PetscInt e; 1106 if (fl2g) { 1107 PetscCall(ISLocalToGlobalMappingApply(fl2g,1,idxs,&e)); 1108 } else { 1109 e = idxs[0]; 1110 } 1111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")",found,i,e,idxs[0]); 1112 } 1113 1114 /* get primal dof index on this coarse edge */ 1115 PetscCall(ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc)); 1116 if (gc[0] > gc[1]) { 1117 PetscInt swap = corners[2*i]; 1118 corners[2*i] = corners[2*i+1]; 1119 corners[2*i+1] = swap; 1120 } 1121 cedges[i] = idxs[size-1]; 1122 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1123 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1124 } 1125 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1126 PetscCall(PetscBTDestroy(&btvc)); 1127 1128 if (PetscDefined(USE_DEBUG)) { 1129 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1130 not interfere with neighbouring coarse edges */ 1131 PetscCall(PetscMalloc1(nee+1,&emarks)); 1132 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1133 for (i=0;i<nv;i++) { 1134 PetscInt emax = 0,eemax = 0; 1135 1136 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1137 PetscCall(PetscArrayzero(emarks,nee+1)); 1138 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1139 for (j=1;j<nee+1;j++) { 1140 if (emax < emarks[j]) { 1141 emax = emarks[j]; 1142 eemax = j; 1143 } 1144 } 1145 /* not relevant for edges */ 1146 if (!eemax) continue; 1147 1148 for (j=ii[i];j<ii[i+1];j++) { 1149 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax,PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT,marks[jj[j]]-1,eemax,i,jj[j]); 1150 } 1151 } 1152 PetscCall(PetscFree(emarks)); 1153 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1154 } 1155 1156 /* Compute extended rows indices for edge blocks of the change of basis */ 1157 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1158 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt,&extmem)); 1159 extmem *= maxsize; 1160 PetscCall(PetscMalloc1(extmem*nee,&extrow)); 1161 PetscCall(PetscMalloc1(nee,&extrows)); 1162 PetscCall(PetscCalloc1(nee,&extrowcum)); 1163 for (i=0;i<nv;i++) { 1164 PetscInt mark = 0,size,start; 1165 1166 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1167 for (j=ii[i];j<ii[i+1];j++) 1168 if (marks[jj[j]] && !mark) 1169 mark = marks[jj[j]]; 1170 1171 /* not relevant */ 1172 if (!mark) continue; 1173 1174 /* import extended row */ 1175 mark--; 1176 start = mark*extmem+extrowcum[mark]; 1177 size = ii[i+1]-ii[i]; 1178 PetscCheck(extrowcum[mark] + size <= extmem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT,extrowcum[mark] + size,extmem); 1179 PetscCall(PetscArraycpy(extrow+start,jj+ii[i],size)); 1180 extrowcum[mark] += size; 1181 } 1182 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1183 PetscCall(MatDestroy(&lGt)); 1184 PetscCall(PetscFree(marks)); 1185 1186 /* Compress extrows */ 1187 cum = 0; 1188 for (i=0;i<nee;i++) { 1189 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1190 PetscCall(PetscSortRemoveDupsInt(&size,start)); 1191 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i])); 1192 cum = PetscMax(cum,size); 1193 } 1194 PetscCall(PetscFree(extrowcum)); 1195 PetscCall(PetscBTDestroy(&btv)); 1196 PetscCall(PetscBTDestroy(&btvcand)); 1197 1198 /* Workspace for lapack inner calls and VecSetValues */ 1199 PetscCall(PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork)); 1200 1201 /* Create change of basis matrix (preallocation can be improved) */ 1202 PetscCall(MatCreate(comm,&T)); 1203 PetscCall(MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,pc->pmat->rmap->N,pc->pmat->rmap->N)); 1204 PetscCall(MatSetType(T,MATAIJ)); 1205 PetscCall(MatSeqAIJSetPreallocation(T,10,NULL)); 1206 PetscCall(MatMPIAIJSetPreallocation(T,10,NULL,10,NULL)); 1207 PetscCall(MatSetLocalToGlobalMapping(T,al2g,al2g)); 1208 PetscCall(MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 1209 PetscCall(MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE)); 1210 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1211 1212 /* Defaults to identity */ 1213 PetscCall(MatCreateVecs(pc->pmat,&tvec,NULL)); 1214 PetscCall(VecSet(tvec,1.0)); 1215 PetscCall(MatDiagonalSet(T,tvec,INSERT_VALUES)); 1216 PetscCall(VecDestroy(&tvec)); 1217 1218 /* Create discrete gradient for the coarser level if needed */ 1219 PetscCall(MatDestroy(&pcbddc->nedcG)); 1220 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1221 if (pcbddc->current_level < pcbddc->max_levels) { 1222 ISLocalToGlobalMapping cel2g,cvl2g; 1223 IS wis,gwis; 1224 PetscInt cnv,cne; 1225 1226 PetscCall(ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis)); 1227 if (fl2g) { 1228 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal)); 1229 } else { 1230 PetscCall(PetscObjectReference((PetscObject)wis)); 1231 pcbddc->nedclocal = wis; 1232 } 1233 PetscCall(ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis)); 1234 PetscCall(ISDestroy(&wis)); 1235 PetscCall(ISRenumber(gwis,NULL,&cne,&wis)); 1236 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cel2g)); 1237 PetscCall(ISDestroy(&wis)); 1238 PetscCall(ISDestroy(&gwis)); 1239 1240 PetscCall(ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis)); 1241 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis)); 1242 PetscCall(ISDestroy(&wis)); 1243 PetscCall(ISRenumber(gwis,NULL,&cnv,&wis)); 1244 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cvl2g)); 1245 PetscCall(ISDestroy(&wis)); 1246 PetscCall(ISDestroy(&gwis)); 1247 1248 PetscCall(MatCreate(comm,&pcbddc->nedcG)); 1249 PetscCall(MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv)); 1250 PetscCall(MatSetType(pcbddc->nedcG,MATAIJ)); 1251 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL)); 1252 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL)); 1253 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g)); 1254 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1255 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1256 } 1257 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1258 1259 #if defined(PRINT_GDET) 1260 inc = 0; 1261 lev = pcbddc->current_level; 1262 #endif 1263 1264 /* Insert values in the change of basis matrix */ 1265 for (i=0;i<nee;i++) { 1266 Mat Gins = NULL, GKins = NULL; 1267 IS cornersis = NULL; 1268 PetscScalar cvals[2]; 1269 1270 if (pcbddc->nedcG) { 1271 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis)); 1272 } 1273 PetscCall(PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork)); 1274 if (Gins && GKins) { 1275 const PetscScalar *data; 1276 const PetscInt *rows,*cols; 1277 PetscInt nrh,nch,nrc,ncc; 1278 1279 PetscCall(ISGetIndices(eedges[i],&cols)); 1280 /* H1 */ 1281 PetscCall(ISGetIndices(extrows[i],&rows)); 1282 PetscCall(MatGetSize(Gins,&nrh,&nch)); 1283 PetscCall(MatDenseGetArrayRead(Gins,&data)); 1284 PetscCall(MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES)); 1285 PetscCall(MatDenseRestoreArrayRead(Gins,&data)); 1286 PetscCall(ISRestoreIndices(extrows[i],&rows)); 1287 /* complement */ 1288 PetscCall(MatGetSize(GKins,&nrc,&ncc)); 1289 PetscCheck(ncc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %" PetscInt_FMT,i); 1290 PetscCheck(ncc + nch == nrc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT,ncc,nch,nrc,i); 1291 PetscCheck(ncc == 1 || !pcbddc->nedcG,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT,i,ncc); 1292 PetscCall(MatDenseGetArrayRead(GKins,&data)); 1293 PetscCall(MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES)); 1294 PetscCall(MatDenseRestoreArrayRead(GKins,&data)); 1295 1296 /* coarse discrete gradient */ 1297 if (pcbddc->nedcG) { 1298 PetscInt cols[2]; 1299 1300 cols[0] = 2*i; 1301 cols[1] = 2*i+1; 1302 PetscCall(MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES)); 1303 } 1304 PetscCall(ISRestoreIndices(eedges[i],&cols)); 1305 } 1306 PetscCall(ISDestroy(&extrows[i])); 1307 PetscCall(ISDestroy(&extcols[i])); 1308 PetscCall(ISDestroy(&cornersis)); 1309 PetscCall(MatDestroy(&Gins)); 1310 PetscCall(MatDestroy(&GKins)); 1311 } 1312 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1313 1314 /* Start assembling */ 1315 PetscCall(MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY)); 1316 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1317 1318 /* Free */ 1319 if (fl2g) { 1320 PetscCall(ISDestroy(&primals)); 1321 for (i=0;i<nee;i++) { 1322 PetscCall(ISDestroy(&eedges[i])); 1323 } 1324 PetscCall(PetscFree(eedges)); 1325 } 1326 1327 /* hack mat_graph with primal dofs on the coarse edges */ 1328 { 1329 PCBDDCGraph graph = pcbddc->mat_graph; 1330 PetscInt *oqueue = graph->queue; 1331 PetscInt *ocptr = graph->cptr; 1332 PetscInt ncc,*idxs; 1333 1334 /* find first primal edge */ 1335 if (pcbddc->nedclocal) { 1336 PetscCall(ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1337 } else { 1338 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges)); 1339 idxs = cedges; 1340 } 1341 cum = 0; 1342 while (cum < nee && cedges[cum] < 0) cum++; 1343 1344 /* adapt connected components */ 1345 PetscCall(PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue)); 1346 graph->cptr[0] = 0; 1347 for (i=0,ncc=0;i<graph->ncc;i++) { 1348 PetscInt lc = ocptr[i+1]-ocptr[i]; 1349 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1350 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1351 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1352 ncc++; 1353 lc--; 1354 cum++; 1355 while (cum < nee && cedges[cum] < 0) cum++; 1356 } 1357 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1358 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1359 ncc++; 1360 } 1361 graph->ncc = ncc; 1362 if (pcbddc->nedclocal) { 1363 PetscCall(ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1364 } 1365 PetscCall(PetscFree2(ocptr,oqueue)); 1366 } 1367 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1368 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1369 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1370 PetscCall(MatDestroy(&conn)); 1371 1372 PetscCall(ISDestroy(&nedfieldlocal)); 1373 PetscCall(PetscFree(extrow)); 1374 PetscCall(PetscFree2(work,rwork)); 1375 PetscCall(PetscFree(corners)); 1376 PetscCall(PetscFree(cedges)); 1377 PetscCall(PetscFree(extrows)); 1378 PetscCall(PetscFree(extcols)); 1379 PetscCall(MatDestroy(&lG)); 1380 1381 /* Complete assembling */ 1382 PetscCall(MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY)); 1383 if (pcbddc->nedcG) { 1384 PetscCall(MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1385 #if 0 1386 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1387 PetscCall(MatView(pcbddc->nedcG,NULL)); 1388 #endif 1389 } 1390 1391 /* set change of basis */ 1392 PetscCall(PCBDDCSetChangeOfBasisMat(pc,T,singular)); 1393 PetscCall(MatDestroy(&T)); 1394 1395 PetscFunctionReturn(0); 1396 } 1397 1398 /* the near-null space of BDDC carries information on quadrature weights, 1399 and these can be collinear -> so cheat with MatNullSpaceCreate 1400 and create a suitable set of basis vectors first */ 1401 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1402 { 1403 PetscInt i; 1404 1405 PetscFunctionBegin; 1406 for (i=0;i<nvecs;i++) { 1407 PetscInt first,last; 1408 1409 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1410 PetscCheck(last-first >= 2*nvecs || !has_const,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1411 if (i>=first && i < last) { 1412 PetscScalar *data; 1413 PetscCall(VecGetArray(quad_vecs[i],&data)); 1414 if (!has_const) { 1415 data[i-first] = 1.; 1416 } else { 1417 data[2*i-first] = 1./PetscSqrtReal(2.); 1418 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1419 } 1420 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1421 } 1422 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1423 } 1424 PetscCall(MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp)); 1425 for (i=0;i<nvecs;i++) { /* reset vectors */ 1426 PetscInt first,last; 1427 PetscCall(VecLockReadPop(quad_vecs[i])); 1428 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1429 if (i>=first && i < last) { 1430 PetscScalar *data; 1431 PetscCall(VecGetArray(quad_vecs[i],&data)); 1432 if (!has_const) { 1433 data[i-first] = 0.; 1434 } else { 1435 data[2*i-first] = 0.; 1436 data[2*i-first+1] = 0.; 1437 } 1438 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1439 } 1440 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1441 PetscCall(VecLockReadPush(quad_vecs[i])); 1442 } 1443 PetscFunctionReturn(0); 1444 } 1445 1446 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1447 { 1448 Mat loc_divudotp; 1449 Vec p,v,vins,quad_vec,*quad_vecs; 1450 ISLocalToGlobalMapping map; 1451 PetscScalar *vals; 1452 const PetscScalar *array; 1453 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1454 PetscInt n_neigh,*neigh,*n_shared,**shared; 1455 PetscMPIInt rank; 1456 1457 PetscFunctionBegin; 1458 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1459 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1460 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A))); 1461 if (!maxneighs) { 1462 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1463 *nnsp = NULL; 1464 PetscFunctionReturn(0); 1465 } 1466 maxsize = 0; 1467 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1468 PetscCall(PetscMalloc2(maxsize,&gidxs,maxsize,&vals)); 1469 /* create vectors to hold quadrature weights */ 1470 PetscCall(MatCreateVecs(A,&quad_vec,NULL)); 1471 if (!transpose) { 1472 PetscCall(MatISGetLocalToGlobalMapping(A,&map,NULL)); 1473 } else { 1474 PetscCall(MatISGetLocalToGlobalMapping(A,NULL,&map)); 1475 } 1476 PetscCall(VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs)); 1477 PetscCall(VecDestroy(&quad_vec)); 1478 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp)); 1479 for (i=0;i<maxneighs;i++) { 1480 PetscCall(VecLockReadPop(quad_vecs[i])); 1481 } 1482 1483 /* compute local quad vec */ 1484 PetscCall(MatISGetLocalMat(divudotp,&loc_divudotp)); 1485 if (!transpose) { 1486 PetscCall(MatCreateVecs(loc_divudotp,&v,&p)); 1487 } else { 1488 PetscCall(MatCreateVecs(loc_divudotp,&p,&v)); 1489 } 1490 PetscCall(VecSet(p,1.)); 1491 if (!transpose) { 1492 PetscCall(MatMultTranspose(loc_divudotp,p,v)); 1493 } else { 1494 PetscCall(MatMult(loc_divudotp,p,v)); 1495 } 1496 if (vl2l) { 1497 Mat lA; 1498 VecScatter sc; 1499 1500 PetscCall(MatISGetLocalMat(A,&lA)); 1501 PetscCall(MatCreateVecs(lA,&vins,NULL)); 1502 PetscCall(VecScatterCreate(v,NULL,vins,vl2l,&sc)); 1503 PetscCall(VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1504 PetscCall(VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1505 PetscCall(VecScatterDestroy(&sc)); 1506 } else { 1507 vins = v; 1508 } 1509 PetscCall(VecGetArrayRead(vins,&array)); 1510 PetscCall(VecDestroy(&p)); 1511 1512 /* insert in global quadrature vecs */ 1513 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 1514 for (i=1;i<n_neigh;i++) { 1515 const PetscInt *idxs; 1516 PetscInt idx,nn,j; 1517 1518 idxs = shared[i]; 1519 nn = n_shared[i]; 1520 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1521 PetscCall(PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx)); 1522 idx = -(idx+1); 1523 PetscCheck(idx >= 0 && idx < maxneighs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")",idx,maxneighs); 1524 PetscCall(ISLocalToGlobalMappingApply(map,nn,idxs,gidxs)); 1525 PetscCall(VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES)); 1526 } 1527 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1528 PetscCall(VecRestoreArrayRead(vins,&array)); 1529 if (vl2l) { 1530 PetscCall(VecDestroy(&vins)); 1531 } 1532 PetscCall(VecDestroy(&v)); 1533 PetscCall(PetscFree2(gidxs,vals)); 1534 1535 /* assemble near null space */ 1536 for (i=0;i<maxneighs;i++) { 1537 PetscCall(VecAssemblyBegin(quad_vecs[i])); 1538 } 1539 for (i=0;i<maxneighs;i++) { 1540 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1541 PetscCall(VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view")); 1542 PetscCall(VecLockReadPush(quad_vecs[i])); 1543 } 1544 PetscCall(VecDestroyVecs(maxneighs,&quad_vecs)); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1549 { 1550 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1551 1552 PetscFunctionBegin; 1553 if (primalv) { 1554 if (pcbddc->user_primal_vertices_local) { 1555 IS list[2], newp; 1556 1557 list[0] = primalv; 1558 list[1] = pcbddc->user_primal_vertices_local; 1559 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp)); 1560 PetscCall(ISSortRemoveDups(newp)); 1561 PetscCall(ISDestroy(&list[1])); 1562 pcbddc->user_primal_vertices_local = newp; 1563 } else { 1564 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primalv)); 1565 } 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 1570 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1571 { 1572 PetscInt f, *comp = (PetscInt *)ctx; 1573 1574 PetscFunctionBegin; 1575 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1576 PetscFunctionReturn(0); 1577 } 1578 1579 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1580 { 1581 Vec local,global; 1582 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1583 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1584 PetscBool monolithic = PETSC_FALSE; 1585 1586 PetscFunctionBegin; 1587 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC"); 1588 PetscCall(PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL)); 1589 PetscOptionsEnd(); 1590 /* need to convert from global to local topology information and remove references to information in global ordering */ 1591 PetscCall(MatCreateVecs(pc->pmat,&global,NULL)); 1592 PetscCall(MatCreateVecs(matis->A,&local,NULL)); 1593 PetscCall(VecBindToCPU(global,PETSC_TRUE)); 1594 PetscCall(VecBindToCPU(local,PETSC_TRUE)); 1595 if (monolithic) { /* just get block size to properly compute vertices */ 1596 if (pcbddc->vertex_size == 1) { 1597 PetscCall(MatGetBlockSize(pc->pmat,&pcbddc->vertex_size)); 1598 } 1599 goto boundary; 1600 } 1601 1602 if (pcbddc->user_provided_isfordofs) { 1603 if (pcbddc->n_ISForDofs) { 1604 PetscInt i; 1605 1606 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal)); 1607 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1608 PetscInt bs; 1609 1610 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i])); 1611 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i],&bs)); 1612 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1613 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1614 } 1615 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1616 pcbddc->n_ISForDofs = 0; 1617 PetscCall(PetscFree(pcbddc->ISForDofs)); 1618 } 1619 } else { 1620 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1621 DM dm; 1622 1623 PetscCall(MatGetDM(pc->pmat, &dm)); 1624 if (!dm) { 1625 PetscCall(PCGetDM(pc, &dm)); 1626 } 1627 if (dm) { 1628 IS *fields; 1629 PetscInt nf,i; 1630 1631 PetscCall(DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL)); 1632 PetscCall(PetscMalloc1(nf,&pcbddc->ISForDofsLocal)); 1633 for (i=0;i<nf;i++) { 1634 PetscInt bs; 1635 1636 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i])); 1637 PetscCall(ISGetBlockSize(fields[i],&bs)); 1638 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1639 PetscCall(ISDestroy(&fields[i])); 1640 } 1641 PetscCall(PetscFree(fields)); 1642 pcbddc->n_ISForDofsLocal = nf; 1643 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1644 PetscContainer c; 1645 1646 PetscCall(PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c)); 1647 if (c) { 1648 MatISLocalFields lf; 1649 PetscCall(PetscContainerGetPointer(c,(void**)&lf)); 1650 PetscCall(PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf)); 1651 } else { /* fallback, create the default fields if bs > 1 */ 1652 PetscInt i, n = matis->A->rmap->n; 1653 PetscCall(MatGetBlockSize(pc->pmat,&i)); 1654 if (i > 1) { 1655 pcbddc->n_ISForDofsLocal = i; 1656 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal)); 1657 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1658 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i])); 1659 } 1660 } 1661 } 1662 } 1663 } else { 1664 PetscInt i; 1665 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1666 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i])); 1667 } 1668 } 1669 } 1670 1671 boundary: 1672 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1673 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal)); 1674 } else if (pcbddc->DirichletBoundariesLocal) { 1675 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal)); 1676 } 1677 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1678 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal)); 1679 } else if (pcbddc->NeumannBoundariesLocal) { 1680 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal)); 1681 } 1682 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1683 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local)); 1684 } 1685 PetscCall(VecDestroy(&global)); 1686 PetscCall(VecDestroy(&local)); 1687 /* detect local disconnected subdomains if requested (use matis->A) */ 1688 if (pcbddc->detect_disconnected) { 1689 IS primalv = NULL; 1690 PetscInt i; 1691 PetscBool filter = pcbddc->detect_disconnected_filter; 1692 1693 for (i=0;i<pcbddc->n_local_subs;i++) { 1694 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1695 } 1696 PetscCall(PetscFree(pcbddc->local_subs)); 1697 PetscCall(PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv)); 1698 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,primalv)); 1699 PetscCall(ISDestroy(&primalv)); 1700 } 1701 /* early stage corner detection */ 1702 { 1703 DM dm; 1704 1705 PetscCall(MatGetDM(pc->pmat,&dm)); 1706 if (!dm) { 1707 PetscCall(PCGetDM(pc,&dm)); 1708 } 1709 if (dm) { 1710 PetscBool isda; 1711 1712 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda)); 1713 if (isda) { 1714 ISLocalToGlobalMapping l2l; 1715 IS corners; 1716 Mat lA; 1717 PetscBool gl,lo; 1718 1719 { 1720 Vec cvec; 1721 const PetscScalar *coords; 1722 PetscInt dof,n,cdim; 1723 PetscBool memc = PETSC_TRUE; 1724 1725 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1726 PetscCall(DMGetCoordinates(dm,&cvec)); 1727 PetscCall(VecGetLocalSize(cvec,&n)); 1728 PetscCall(VecGetBlockSize(cvec,&cdim)); 1729 n /= cdim; 1730 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1731 PetscCall(PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords)); 1732 PetscCall(VecGetArrayRead(cvec,&coords)); 1733 #if defined(PETSC_USE_COMPLEX) 1734 memc = PETSC_FALSE; 1735 #endif 1736 if (dof != 1) memc = PETSC_FALSE; 1737 if (memc) { 1738 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof)); 1739 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1740 PetscReal *bcoords = pcbddc->mat_graph->coords; 1741 PetscInt i, b, d; 1742 1743 for (i=0;i<n;i++) { 1744 for (b=0;b<dof;b++) { 1745 for (d=0;d<cdim;d++) { 1746 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1747 } 1748 } 1749 } 1750 } 1751 PetscCall(VecRestoreArrayRead(cvec,&coords)); 1752 pcbddc->mat_graph->cdim = cdim; 1753 pcbddc->mat_graph->cnloc = dof*n; 1754 pcbddc->mat_graph->cloc = PETSC_FALSE; 1755 } 1756 PetscCall(DMDAGetSubdomainCornersIS(dm,&corners)); 1757 PetscCall(MatISGetLocalMat(pc->pmat,&lA)); 1758 PetscCall(MatGetLocalToGlobalMapping(lA,&l2l,NULL)); 1759 PetscCall(MatISRestoreLocalMat(pc->pmat,&lA)); 1760 lo = (PetscBool)(l2l && corners); 1761 PetscCall(MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 1762 if (gl) { /* From PETSc's DMDA */ 1763 const PetscInt *idx; 1764 PetscInt dof,bs,*idxout,n; 1765 1766 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1767 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l,&bs)); 1768 PetscCall(ISGetLocalSize(corners,&n)); 1769 PetscCall(ISGetIndices(corners,&idx)); 1770 if (bs == dof) { 1771 PetscCall(PetscMalloc1(n,&idxout)); 1772 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout)); 1773 } else { /* the original DMDA local-to-local map have been modified */ 1774 PetscInt i,d; 1775 1776 PetscCall(PetscMalloc1(dof*n,&idxout)); 1777 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1778 PetscCall(ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout)); 1779 1780 bs = 1; 1781 n *= dof; 1782 } 1783 PetscCall(ISRestoreIndices(corners,&idx)); 1784 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1785 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners)); 1786 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,corners)); 1787 PetscCall(ISDestroy(&corners)); 1788 pcbddc->corner_selected = PETSC_TRUE; 1789 pcbddc->corner_selection = PETSC_TRUE; 1790 } 1791 if (corners) { 1792 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1793 } 1794 } 1795 } 1796 } 1797 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1798 DM dm; 1799 1800 PetscCall(MatGetDM(pc->pmat,&dm)); 1801 if (!dm) { 1802 PetscCall(PCGetDM(pc,&dm)); 1803 } 1804 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1805 Vec vcoords; 1806 PetscSection section; 1807 PetscReal *coords; 1808 PetscInt d,cdim,nl,nf,**ctxs; 1809 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1810 /* debug coordinates */ 1811 PetscViewer viewer; 1812 PetscBool flg; 1813 PetscViewerFormat format; 1814 const char *prefix; 1815 1816 PetscCall(DMGetCoordinateDim(dm,&cdim)); 1817 PetscCall(DMGetLocalSection(dm,§ion)); 1818 PetscCall(PetscSectionGetNumFields(section,&nf)); 1819 PetscCall(DMCreateGlobalVector(dm,&vcoords)); 1820 PetscCall(VecGetLocalSize(vcoords,&nl)); 1821 PetscCall(PetscMalloc1(nl*cdim,&coords)); 1822 PetscCall(PetscMalloc2(nf,&funcs,nf,&ctxs)); 1823 PetscCall(PetscMalloc1(nf,&ctxs[0])); 1824 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1825 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1826 1827 /* debug coordinates */ 1828 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 1829 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords),((PetscObject)vcoords)->options,prefix,"-pc_bddc_coords_vec_view",&viewer,&format,&flg)); 1830 if (flg) PetscCall(PetscViewerPushFormat(viewer,format)); 1831 for (d=0;d<cdim;d++) { 1832 PetscInt i; 1833 const PetscScalar *v; 1834 char name[16]; 1835 1836 for (i=0;i<nf;i++) ctxs[i][0] = d; 1837 PetscCall(PetscSNPrintf(name,sizeof(name),"bddc_coords_%d",(int)d)); 1838 PetscCall(PetscObjectSetName((PetscObject)vcoords,name)); 1839 PetscCall(DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords)); 1840 if (flg) PetscCall(VecView(vcoords,viewer)); 1841 PetscCall(VecGetArrayRead(vcoords,&v)); 1842 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1843 PetscCall(VecRestoreArrayRead(vcoords,&v)); 1844 } 1845 PetscCall(VecDestroy(&vcoords)); 1846 PetscCall(PCSetCoordinates(pc,cdim,nl,coords)); 1847 PetscCall(PetscFree(coords)); 1848 PetscCall(PetscFree(ctxs[0])); 1849 PetscCall(PetscFree2(funcs,ctxs)); 1850 if (flg) { 1851 PetscCall(PetscViewerPopFormat(viewer)); 1852 PetscCall(PetscViewerDestroy(&viewer)); 1853 } 1854 } 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1860 { 1861 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1862 IS nis; 1863 const PetscInt *idxs; 1864 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1865 1866 PetscFunctionBegin; 1867 PetscCheck(mop == MPI_LAND || mop == MPI_LOR,PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1868 if (mop == MPI_LAND) { 1869 /* init rootdata with true */ 1870 for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1; 1871 } else { 1872 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 1873 } 1874 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 1875 PetscCall(ISGetLocalSize(*is,&nd)); 1876 PetscCall(ISGetIndices(*is,&idxs)); 1877 for (i=0;i<nd;i++) 1878 if (-1 < idxs[i] && idxs[i] < n) 1879 matis->sf_leafdata[idxs[i]] = 1; 1880 PetscCall(ISRestoreIndices(*is,&idxs)); 1881 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1882 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1883 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1884 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1885 if (mop == MPI_LAND) { 1886 PetscCall(PetscMalloc1(nd,&nidxs)); 1887 } else { 1888 PetscCall(PetscMalloc1(n,&nidxs)); 1889 } 1890 for (i=0,nnd=0;i<n;i++) 1891 if (matis->sf_leafdata[i]) 1892 nidxs[nnd++] = i; 1893 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis)); 1894 PetscCall(ISDestroy(is)); 1895 *is = nis; 1896 PetscFunctionReturn(0); 1897 } 1898 1899 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1900 { 1901 PC_IS *pcis = (PC_IS*)(pc->data); 1902 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1903 1904 PetscFunctionBegin; 1905 if (!pcbddc->benign_have_null) { 1906 PetscFunctionReturn(0); 1907 } 1908 if (pcbddc->ChangeOfBasisMatrix) { 1909 Vec swap; 1910 1911 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change)); 1912 swap = pcbddc->work_change; 1913 pcbddc->work_change = r; 1914 r = swap; 1915 } 1916 PetscCall(VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1917 PetscCall(VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1918 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1919 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D)); 1920 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1921 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 1922 PetscCall(VecSet(z,0.)); 1923 PetscCall(VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1924 PetscCall(VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1925 if (pcbddc->ChangeOfBasisMatrix) { 1926 pcbddc->work_change = r; 1927 PetscCall(VecCopy(z,pcbddc->work_change)); 1928 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z)); 1929 } 1930 PetscFunctionReturn(0); 1931 } 1932 1933 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1934 { 1935 PCBDDCBenignMatMult_ctx ctx; 1936 PetscBool apply_right,apply_left,reset_x; 1937 1938 PetscFunctionBegin; 1939 PetscCall(MatShellGetContext(A,&ctx)); 1940 if (transpose) { 1941 apply_right = ctx->apply_left; 1942 apply_left = ctx->apply_right; 1943 } else { 1944 apply_right = ctx->apply_right; 1945 apply_left = ctx->apply_left; 1946 } 1947 reset_x = PETSC_FALSE; 1948 if (apply_right) { 1949 const PetscScalar *ax; 1950 PetscInt nl,i; 1951 1952 PetscCall(VecGetLocalSize(x,&nl)); 1953 PetscCall(VecGetArrayRead(x,&ax)); 1954 PetscCall(PetscArraycpy(ctx->work,ax,nl)); 1955 PetscCall(VecRestoreArrayRead(x,&ax)); 1956 for (i=0;i<ctx->benign_n;i++) { 1957 PetscScalar sum,val; 1958 const PetscInt *idxs; 1959 PetscInt nz,j; 1960 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1961 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1962 sum = 0.; 1963 if (ctx->apply_p0) { 1964 val = ctx->work[idxs[nz-1]]; 1965 for (j=0;j<nz-1;j++) { 1966 sum += ctx->work[idxs[j]]; 1967 ctx->work[idxs[j]] += val; 1968 } 1969 } else { 1970 for (j=0;j<nz-1;j++) { 1971 sum += ctx->work[idxs[j]]; 1972 } 1973 } 1974 ctx->work[idxs[nz-1]] -= sum; 1975 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1976 } 1977 PetscCall(VecPlaceArray(x,ctx->work)); 1978 reset_x = PETSC_TRUE; 1979 } 1980 if (transpose) { 1981 PetscCall(MatMultTranspose(ctx->A,x,y)); 1982 } else { 1983 PetscCall(MatMult(ctx->A,x,y)); 1984 } 1985 if (reset_x) PetscCall(VecResetArray(x)); 1986 if (apply_left) { 1987 PetscScalar *ay; 1988 PetscInt i; 1989 1990 PetscCall(VecGetArray(y,&ay)); 1991 for (i=0;i<ctx->benign_n;i++) { 1992 PetscScalar sum,val; 1993 const PetscInt *idxs; 1994 PetscInt nz,j; 1995 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1996 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1997 val = -ay[idxs[nz-1]]; 1998 if (ctx->apply_p0) { 1999 sum = 0.; 2000 for (j=0;j<nz-1;j++) { 2001 sum += ay[idxs[j]]; 2002 ay[idxs[j]] += val; 2003 } 2004 ay[idxs[nz-1]] += sum; 2005 } else { 2006 for (j=0;j<nz-1;j++) { 2007 ay[idxs[j]] += val; 2008 } 2009 ay[idxs[nz-1]] = 0.; 2010 } 2011 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 2012 } 2013 PetscCall(VecRestoreArray(y,&ay)); 2014 } 2015 PetscFunctionReturn(0); 2016 } 2017 2018 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2019 { 2020 PetscFunctionBegin; 2021 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE)); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2026 { 2027 PetscFunctionBegin; 2028 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE)); 2029 PetscFunctionReturn(0); 2030 } 2031 2032 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2033 { 2034 PC_IS *pcis = (PC_IS*)pc->data; 2035 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2036 PCBDDCBenignMatMult_ctx ctx; 2037 2038 PetscFunctionBegin; 2039 if (!restore) { 2040 Mat A_IB,A_BI; 2041 PetscScalar *work; 2042 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2043 2044 PetscCheck(!pcbddc->benign_original_mat,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2045 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2046 PetscCall(PetscMalloc1(pcis->n,&work)); 2047 PetscCall(MatCreate(PETSC_COMM_SELF,&A_IB)); 2048 PetscCall(MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE)); 2049 PetscCall(MatSetType(A_IB,MATSHELL)); 2050 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private)); 2051 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2052 PetscCall(PetscNew(&ctx)); 2053 PetscCall(MatShellSetContext(A_IB,ctx)); 2054 ctx->apply_left = PETSC_TRUE; 2055 ctx->apply_right = PETSC_FALSE; 2056 ctx->apply_p0 = PETSC_FALSE; 2057 ctx->benign_n = pcbddc->benign_n; 2058 if (reuse) { 2059 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2060 ctx->free = PETSC_FALSE; 2061 } else { /* TODO: could be optimized for successive solves */ 2062 ISLocalToGlobalMapping N_to_D; 2063 PetscInt i; 2064 2065 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D)); 2066 PetscCall(PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs)); 2067 for (i=0;i<pcbddc->benign_n;i++) { 2068 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i])); 2069 } 2070 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2071 ctx->free = PETSC_TRUE; 2072 } 2073 ctx->A = pcis->A_IB; 2074 ctx->work = work; 2075 PetscCall(MatSetUp(A_IB)); 2076 PetscCall(MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY)); 2077 PetscCall(MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY)); 2078 pcis->A_IB = A_IB; 2079 2080 /* A_BI as A_IB^T */ 2081 PetscCall(MatCreateTranspose(A_IB,&A_BI)); 2082 pcbddc->benign_original_mat = pcis->A_BI; 2083 pcis->A_BI = A_BI; 2084 } else { 2085 if (!pcbddc->benign_original_mat) { 2086 PetscFunctionReturn(0); 2087 } 2088 PetscCall(MatShellGetContext(pcis->A_IB,&ctx)); 2089 PetscCall(MatDestroy(&pcis->A_IB)); 2090 pcis->A_IB = ctx->A; 2091 ctx->A = NULL; 2092 PetscCall(MatDestroy(&pcis->A_BI)); 2093 pcis->A_BI = pcbddc->benign_original_mat; 2094 pcbddc->benign_original_mat = NULL; 2095 if (ctx->free) { 2096 PetscInt i; 2097 for (i=0;i<ctx->benign_n;i++) { 2098 PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2099 } 2100 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2101 } 2102 PetscCall(PetscFree(ctx->work)); 2103 PetscCall(PetscFree(ctx)); 2104 } 2105 PetscFunctionReturn(0); 2106 } 2107 2108 /* used just in bddc debug mode */ 2109 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2110 { 2111 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2112 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2113 Mat An; 2114 2115 PetscFunctionBegin; 2116 PetscCall(MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An)); 2117 PetscCall(MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL)); 2118 if (is1) { 2119 PetscCall(MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B)); 2120 PetscCall(MatDestroy(&An)); 2121 } else { 2122 *B = An; 2123 } 2124 PetscFunctionReturn(0); 2125 } 2126 2127 /* TODO: add reuse flag */ 2128 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2129 { 2130 Mat Bt; 2131 PetscScalar *a,*bdata; 2132 const PetscInt *ii,*ij; 2133 PetscInt m,n,i,nnz,*bii,*bij; 2134 PetscBool flg_row; 2135 2136 PetscFunctionBegin; 2137 PetscCall(MatGetSize(A,&n,&m)); 2138 PetscCall(MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2139 PetscCall(MatSeqAIJGetArray(A,&a)); 2140 nnz = n; 2141 for (i=0;i<ii[n];i++) { 2142 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2143 } 2144 PetscCall(PetscMalloc1(n+1,&bii)); 2145 PetscCall(PetscMalloc1(nnz,&bij)); 2146 PetscCall(PetscMalloc1(nnz,&bdata)); 2147 nnz = 0; 2148 bii[0] = 0; 2149 for (i=0;i<n;i++) { 2150 PetscInt j; 2151 for (j=ii[i];j<ii[i+1];j++) { 2152 PetscScalar entry = a[j]; 2153 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2154 bij[nnz] = ij[j]; 2155 bdata[nnz] = entry; 2156 nnz++; 2157 } 2158 } 2159 bii[i+1] = nnz; 2160 } 2161 PetscCall(MatSeqAIJRestoreArray(A,&a)); 2162 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt)); 2163 PetscCall(MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2164 { 2165 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2166 b->free_a = PETSC_TRUE; 2167 b->free_ij = PETSC_TRUE; 2168 } 2169 if (*B == A) { 2170 PetscCall(MatDestroy(&A)); 2171 } 2172 *B = Bt; 2173 PetscFunctionReturn(0); 2174 } 2175 2176 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2177 { 2178 Mat B = NULL; 2179 DM dm; 2180 IS is_dummy,*cc_n; 2181 ISLocalToGlobalMapping l2gmap_dummy; 2182 PCBDDCGraph graph; 2183 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2184 PetscInt i,n; 2185 PetscInt *xadj,*adjncy; 2186 PetscBool isplex = PETSC_FALSE; 2187 2188 PetscFunctionBegin; 2189 if (ncc) *ncc = 0; 2190 if (cc) *cc = NULL; 2191 if (primalv) *primalv = NULL; 2192 PetscCall(PCBDDCGraphCreate(&graph)); 2193 PetscCall(MatGetDM(pc->pmat,&dm)); 2194 if (!dm) { 2195 PetscCall(PCGetDM(pc,&dm)); 2196 } 2197 if (dm) { 2198 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex)); 2199 } 2200 if (filter) isplex = PETSC_FALSE; 2201 2202 if (isplex) { /* this code has been modified from plexpartition.c */ 2203 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2204 PetscInt *adj = NULL; 2205 IS cellNumbering; 2206 const PetscInt *cellNum; 2207 PetscBool useCone, useClosure; 2208 PetscSection section; 2209 PetscSegBuffer adjBuffer; 2210 PetscSF sfPoint; 2211 2212 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2213 PetscCall(DMGetPointSF(dm, &sfPoint)); 2214 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2215 /* Build adjacency graph via a section/segbuffer */ 2216 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion)); 2217 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2218 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer)); 2219 /* Always use FVM adjacency to create partitioner graph */ 2220 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2221 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2222 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2223 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2224 for (n = 0, p = pStart; p < pEnd; p++) { 2225 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2226 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2227 adjSize = PETSC_DETERMINE; 2228 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2229 for (a = 0; a < adjSize; ++a) { 2230 const PetscInt point = adj[a]; 2231 if (pStart <= point && point < pEnd) { 2232 PetscInt *PETSC_RESTRICT pBuf; 2233 PetscCall(PetscSectionAddDof(section, p, 1)); 2234 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2235 *pBuf = point; 2236 } 2237 } 2238 n++; 2239 } 2240 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2241 /* Derive CSR graph from section/segbuffer */ 2242 PetscCall(PetscSectionSetUp(section)); 2243 PetscCall(PetscSectionGetStorageSize(section, &size)); 2244 PetscCall(PetscMalloc1(n+1, &xadj)); 2245 for (idx = 0, p = pStart; p < pEnd; p++) { 2246 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2247 PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2248 } 2249 xadj[n] = size; 2250 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2251 /* Clean up */ 2252 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2253 PetscCall(PetscSectionDestroy(§ion)); 2254 PetscCall(PetscFree(adj)); 2255 graph->xadj = xadj; 2256 graph->adjncy = adjncy; 2257 } else { 2258 Mat A; 2259 PetscBool isseqaij, flg_row; 2260 2261 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2262 if (!A->rmap->N || !A->cmap->N) { 2263 PetscCall(PCBDDCGraphDestroy(&graph)); 2264 PetscFunctionReturn(0); 2265 } 2266 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij)); 2267 if (!isseqaij && filter) { 2268 PetscBool isseqdense; 2269 2270 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense)); 2271 if (!isseqdense) { 2272 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 2273 } else { /* TODO: rectangular case and LDA */ 2274 PetscScalar *array; 2275 PetscReal chop=1.e-6; 2276 2277 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 2278 PetscCall(MatDenseGetArray(B,&array)); 2279 PetscCall(MatGetSize(B,&n,NULL)); 2280 for (i=0;i<n;i++) { 2281 PetscInt j; 2282 for (j=i+1;j<n;j++) { 2283 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2284 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2285 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2286 } 2287 } 2288 PetscCall(MatDenseRestoreArray(B,&array)); 2289 PetscCall(MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B)); 2290 } 2291 } else { 2292 PetscCall(PetscObjectReference((PetscObject)A)); 2293 B = A; 2294 } 2295 PetscCall(MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2296 2297 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2298 if (filter) { 2299 PetscScalar *data; 2300 PetscInt j,cum; 2301 2302 PetscCall(PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered)); 2303 PetscCall(MatSeqAIJGetArray(B,&data)); 2304 cum = 0; 2305 for (i=0;i<n;i++) { 2306 PetscInt t; 2307 2308 for (j=xadj[i];j<xadj[i+1];j++) { 2309 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2310 continue; 2311 } 2312 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2313 } 2314 t = xadj_filtered[i]; 2315 xadj_filtered[i] = cum; 2316 cum += t; 2317 } 2318 PetscCall(MatSeqAIJRestoreArray(B,&data)); 2319 graph->xadj = xadj_filtered; 2320 graph->adjncy = adjncy_filtered; 2321 } else { 2322 graph->xadj = xadj; 2323 graph->adjncy = adjncy; 2324 } 2325 } 2326 /* compute local connected components using PCBDDCGraph */ 2327 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy)); 2328 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy)); 2329 PetscCall(ISDestroy(&is_dummy)); 2330 PetscCall(PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT)); 2331 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2332 PetscCall(PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL)); 2333 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2334 2335 /* partial clean up */ 2336 PetscCall(PetscFree2(xadj_filtered,adjncy_filtered)); 2337 if (B) { 2338 PetscBool flg_row; 2339 PetscCall(MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2340 PetscCall(MatDestroy(&B)); 2341 } 2342 if (isplex) { 2343 PetscCall(PetscFree(xadj)); 2344 PetscCall(PetscFree(adjncy)); 2345 } 2346 2347 /* get back data */ 2348 if (isplex) { 2349 if (ncc) *ncc = graph->ncc; 2350 if (cc || primalv) { 2351 Mat A; 2352 PetscBT btv,btvt; 2353 PetscSection subSection; 2354 PetscInt *ids,cum,cump,*cids,*pids; 2355 2356 PetscCall(DMPlexGetSubdomainSection(dm,&subSection)); 2357 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2358 PetscCall(PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids)); 2359 PetscCall(PetscBTCreate(A->rmap->n,&btv)); 2360 PetscCall(PetscBTCreate(A->rmap->n,&btvt)); 2361 2362 cids[0] = 0; 2363 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2364 PetscInt j; 2365 2366 PetscCall(PetscBTMemzero(A->rmap->n,btvt)); 2367 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2368 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2369 2370 PetscCall(DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2371 for (k = 0; k < 2*size; k += 2) { 2372 PetscInt s, pp, p = closure[k], off, dof, cdof; 2373 2374 PetscCall(PetscSectionGetConstraintDof(subSection,p,&cdof)); 2375 PetscCall(PetscSectionGetOffset(subSection,p,&off)); 2376 PetscCall(PetscSectionGetDof(subSection,p,&dof)); 2377 for (s = 0; s < dof-cdof; s++) { 2378 if (PetscBTLookupSet(btvt,off+s)) continue; 2379 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2380 else pids[cump++] = off+s; /* cross-vertex */ 2381 } 2382 PetscCall(DMPlexGetTreeParent(dm,p,&pp,NULL)); 2383 if (pp != p) { 2384 PetscCall(PetscSectionGetConstraintDof(subSection,pp,&cdof)); 2385 PetscCall(PetscSectionGetOffset(subSection,pp,&off)); 2386 PetscCall(PetscSectionGetDof(subSection,pp,&dof)); 2387 for (s = 0; s < dof-cdof; s++) { 2388 if (PetscBTLookupSet(btvt,off+s)) continue; 2389 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2390 else pids[cump++] = off+s; /* cross-vertex */ 2391 } 2392 } 2393 } 2394 PetscCall(DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2395 } 2396 cids[i+1] = cum; 2397 /* mark dofs as already assigned */ 2398 for (j = cids[i]; j < cids[i+1]; j++) { 2399 PetscCall(PetscBTSet(btv,ids[j])); 2400 } 2401 } 2402 if (cc) { 2403 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2404 for (i = 0; i < graph->ncc; i++) { 2405 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i])); 2406 } 2407 *cc = cc_n; 2408 } 2409 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv)); 2410 PetscCall(PetscFree3(ids,cids,pids)); 2411 PetscCall(PetscBTDestroy(&btv)); 2412 PetscCall(PetscBTDestroy(&btvt)); 2413 } 2414 } else { 2415 if (ncc) *ncc = graph->ncc; 2416 if (cc) { 2417 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2418 for (i=0;i<graph->ncc;i++) { 2419 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i])); 2420 } 2421 *cc = cc_n; 2422 } 2423 } 2424 /* clean up graph */ 2425 graph->xadj = NULL; 2426 graph->adjncy = NULL; 2427 PetscCall(PCBDDCGraphDestroy(&graph)); 2428 PetscFunctionReturn(0); 2429 } 2430 2431 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2432 { 2433 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2434 PC_IS* pcis = (PC_IS*)(pc->data); 2435 IS dirIS = NULL; 2436 PetscInt i; 2437 2438 PetscFunctionBegin; 2439 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS)); 2440 if (zerodiag) { 2441 Mat A; 2442 Vec vec3_N; 2443 PetscScalar *vals; 2444 const PetscInt *idxs; 2445 PetscInt nz,*count; 2446 2447 /* p0 */ 2448 PetscCall(VecSet(pcis->vec1_N,0.)); 2449 PetscCall(PetscMalloc1(pcis->n,&vals)); 2450 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2451 PetscCall(ISGetIndices(zerodiag,&idxs)); 2452 for (i=0;i<nz;i++) vals[i] = 1.; 2453 PetscCall(VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES)); 2454 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2455 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2456 /* v_I */ 2457 PetscCall(VecSetRandom(pcis->vec2_N,NULL)); 2458 for (i=0;i<nz;i++) vals[i] = 0.; 2459 PetscCall(VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES)); 2460 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2461 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2462 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2463 PetscCall(VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES)); 2464 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2465 if (dirIS) { 2466 PetscInt n; 2467 2468 PetscCall(ISGetLocalSize(dirIS,&n)); 2469 PetscCall(ISGetIndices(dirIS,&idxs)); 2470 for (i=0;i<n;i++) vals[i] = 0.; 2471 PetscCall(VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES)); 2472 PetscCall(ISRestoreIndices(dirIS,&idxs)); 2473 } 2474 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2475 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2476 PetscCall(VecDuplicate(pcis->vec1_N,&vec3_N)); 2477 PetscCall(VecSet(vec3_N,0.)); 2478 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2479 PetscCall(MatMult(A,pcis->vec1_N,vec3_N)); 2480 PetscCall(VecDot(vec3_N,pcis->vec2_N,&vals[0])); 2481 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",(double)PetscAbsScalar(vals[0])); 2482 PetscCall(PetscFree(vals)); 2483 PetscCall(VecDestroy(&vec3_N)); 2484 2485 /* there should not be any pressure dofs lying on the interface */ 2486 PetscCall(PetscCalloc1(pcis->n,&count)); 2487 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2488 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2489 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2490 PetscCall(ISGetIndices(zerodiag,&idxs)); 2491 for (i=0;i<nz;i++) PetscCheck(!count[idxs[i]],PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof",idxs[i]); 2492 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2493 PetscCall(PetscFree(count)); 2494 } 2495 PetscCall(ISDestroy(&dirIS)); 2496 2497 /* check PCBDDCBenignGetOrSetP0 */ 2498 PetscCall(VecSetRandom(pcis->vec1_global,NULL)); 2499 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2500 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE)); 2501 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2502 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE)); 2503 for (i=0;i<pcbddc->benign_n;i++) { 2504 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2505 PetscCheck(val == -PetscGlobalRank-i,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g",(double)PetscRealPart(pcbddc->benign_p0[i]),i,(double)(-PetscGlobalRank-i)); 2506 } 2507 PetscFunctionReturn(0); 2508 } 2509 2510 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2511 { 2512 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2513 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2514 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2515 PetscInt nz,n,benign_n,bsp = 1; 2516 PetscInt *interior_dofs,n_interior_dofs,nneu; 2517 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2518 2519 PetscFunctionBegin; 2520 if (reuse) goto project_b0; 2521 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2522 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2523 for (n=0;n<pcbddc->benign_n;n++) { 2524 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2525 } 2526 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2527 has_null_pressures = PETSC_TRUE; 2528 have_null = PETSC_TRUE; 2529 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2530 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2531 Checks if all the pressure dofs in each subdomain have a zero diagonal 2532 If not, a change of basis on pressures is not needed 2533 since the local Schur complements are already SPD 2534 */ 2535 if (pcbddc->n_ISForDofsLocal) { 2536 IS iP = NULL; 2537 PetscInt p,*pp; 2538 PetscBool flg; 2539 2540 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp)); 2541 n = pcbddc->n_ISForDofsLocal; 2542 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC"); 2543 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg)); 2544 PetscOptionsEnd(); 2545 if (!flg) { 2546 n = 1; 2547 pp[0] = pcbddc->n_ISForDofsLocal-1; 2548 } 2549 2550 bsp = 0; 2551 for (p=0;p<n;p++) { 2552 PetscInt bs; 2553 2554 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal,PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %" PetscInt_FMT,pp[p]); 2555 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2556 bsp += bs; 2557 } 2558 PetscCall(PetscMalloc1(bsp,&bzerodiag)); 2559 bsp = 0; 2560 for (p=0;p<n;p++) { 2561 const PetscInt *idxs; 2562 PetscInt b,bs,npl,*bidxs; 2563 2564 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2565 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl)); 2566 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2567 PetscCall(PetscMalloc1(npl/bs,&bidxs)); 2568 for (b=0;b<bs;b++) { 2569 PetscInt i; 2570 2571 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2572 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp])); 2573 bsp++; 2574 } 2575 PetscCall(PetscFree(bidxs)); 2576 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2577 } 2578 PetscCall(ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures)); 2579 2580 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2581 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP)); 2582 if (iP) { 2583 IS newpressures; 2584 2585 PetscCall(ISDifference(pressures,iP,&newpressures)); 2586 PetscCall(ISDestroy(&pressures)); 2587 pressures = newpressures; 2588 } 2589 PetscCall(ISSorted(pressures,&sorted)); 2590 if (!sorted) { 2591 PetscCall(ISSort(pressures)); 2592 } 2593 PetscCall(PetscFree(pp)); 2594 } 2595 2596 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2597 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2598 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2599 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag)); 2600 PetscCall(ISSorted(zerodiag,&sorted)); 2601 if (!sorted) { 2602 PetscCall(ISSort(zerodiag)); 2603 } 2604 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2605 zerodiag_save = zerodiag; 2606 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2607 if (!nz) { 2608 if (n) have_null = PETSC_FALSE; 2609 has_null_pressures = PETSC_FALSE; 2610 PetscCall(ISDestroy(&zerodiag)); 2611 } 2612 recompute_zerodiag = PETSC_FALSE; 2613 2614 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2615 zerodiag_subs = NULL; 2616 benign_n = 0; 2617 n_interior_dofs = 0; 2618 interior_dofs = NULL; 2619 nneu = 0; 2620 if (pcbddc->NeumannBoundariesLocal) { 2621 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu)); 2622 } 2623 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2624 if (checkb) { /* need to compute interior nodes */ 2625 PetscInt n,i,j; 2626 PetscInt n_neigh,*neigh,*n_shared,**shared; 2627 PetscInt *iwork; 2628 2629 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping,&n)); 2630 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2631 PetscCall(PetscCalloc1(n,&iwork)); 2632 PetscCall(PetscMalloc1(n,&interior_dofs)); 2633 for (i=1;i<n_neigh;i++) 2634 for (j=0;j<n_shared[i];j++) 2635 iwork[shared[i][j]] += 1; 2636 for (i=0;i<n;i++) 2637 if (!iwork[i]) 2638 interior_dofs[n_interior_dofs++] = i; 2639 PetscCall(PetscFree(iwork)); 2640 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2641 } 2642 if (has_null_pressures) { 2643 IS *subs; 2644 PetscInt nsubs,i,j,nl; 2645 const PetscInt *idxs; 2646 PetscScalar *array; 2647 Vec *work; 2648 2649 subs = pcbddc->local_subs; 2650 nsubs = pcbddc->n_local_subs; 2651 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2652 if (checkb) { 2653 PetscCall(VecDuplicateVecs(matis->y,2,&work)); 2654 PetscCall(ISGetLocalSize(zerodiag,&nl)); 2655 PetscCall(ISGetIndices(zerodiag,&idxs)); 2656 /* work[0] = 1_p */ 2657 PetscCall(VecSet(work[0],0.)); 2658 PetscCall(VecGetArray(work[0],&array)); 2659 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2660 PetscCall(VecRestoreArray(work[0],&array)); 2661 /* work[0] = 1_v */ 2662 PetscCall(VecSet(work[1],1.)); 2663 PetscCall(VecGetArray(work[1],&array)); 2664 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2665 PetscCall(VecRestoreArray(work[1],&array)); 2666 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2667 } 2668 2669 if (nsubs > 1 || bsp > 1) { 2670 IS *is; 2671 PetscInt b,totb; 2672 2673 totb = bsp; 2674 is = bsp > 1 ? bzerodiag : &zerodiag; 2675 nsubs = PetscMax(nsubs,1); 2676 PetscCall(PetscCalloc1(nsubs*totb,&zerodiag_subs)); 2677 for (b=0;b<totb;b++) { 2678 for (i=0;i<nsubs;i++) { 2679 ISLocalToGlobalMapping l2g; 2680 IS t_zerodiag_subs; 2681 PetscInt nl; 2682 2683 if (subs) { 2684 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i],&l2g)); 2685 } else { 2686 IS tis; 2687 2688 PetscCall(MatGetLocalSize(pcbddc->local_mat,&nl,NULL)); 2689 PetscCall(ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis)); 2690 PetscCall(ISLocalToGlobalMappingCreateIS(tis,&l2g)); 2691 PetscCall(ISDestroy(&tis)); 2692 } 2693 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs)); 2694 PetscCall(ISGetLocalSize(t_zerodiag_subs,&nl)); 2695 if (nl) { 2696 PetscBool valid = PETSC_TRUE; 2697 2698 if (checkb) { 2699 PetscCall(VecSet(matis->x,0)); 2700 PetscCall(ISGetLocalSize(subs[i],&nl)); 2701 PetscCall(ISGetIndices(subs[i],&idxs)); 2702 PetscCall(VecGetArray(matis->x,&array)); 2703 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2704 PetscCall(VecRestoreArray(matis->x,&array)); 2705 PetscCall(ISRestoreIndices(subs[i],&idxs)); 2706 PetscCall(VecPointwiseMult(matis->x,work[0],matis->x)); 2707 PetscCall(MatMult(matis->A,matis->x,matis->y)); 2708 PetscCall(VecPointwiseMult(matis->y,work[1],matis->y)); 2709 PetscCall(VecGetArray(matis->y,&array)); 2710 for (j=0;j<n_interior_dofs;j++) { 2711 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2712 valid = PETSC_FALSE; 2713 break; 2714 } 2715 } 2716 PetscCall(VecRestoreArray(matis->y,&array)); 2717 } 2718 if (valid && nneu) { 2719 const PetscInt *idxs; 2720 PetscInt nzb; 2721 2722 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2723 PetscCall(ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL)); 2724 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2725 if (nzb) valid = PETSC_FALSE; 2726 } 2727 if (valid && pressures) { 2728 IS t_pressure_subs,tmp; 2729 PetscInt i1,i2; 2730 2731 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs)); 2732 PetscCall(ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp)); 2733 PetscCall(ISGetLocalSize(tmp,&i1)); 2734 PetscCall(ISGetLocalSize(t_zerodiag_subs,&i2)); 2735 if (i2 != i1) valid = PETSC_FALSE; 2736 PetscCall(ISDestroy(&t_pressure_subs)); 2737 PetscCall(ISDestroy(&tmp)); 2738 } 2739 if (valid) { 2740 PetscCall(ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n])); 2741 benign_n++; 2742 } else recompute_zerodiag = PETSC_TRUE; 2743 } 2744 PetscCall(ISDestroy(&t_zerodiag_subs)); 2745 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2746 } 2747 } 2748 } else { /* there's just one subdomain (or zero if they have not been detected */ 2749 PetscBool valid = PETSC_TRUE; 2750 2751 if (nneu) valid = PETSC_FALSE; 2752 if (valid && pressures) { 2753 PetscCall(ISEqual(pressures,zerodiag,&valid)); 2754 } 2755 if (valid && checkb) { 2756 PetscCall(MatMult(matis->A,work[0],matis->x)); 2757 PetscCall(VecPointwiseMult(matis->x,work[1],matis->x)); 2758 PetscCall(VecGetArray(matis->x,&array)); 2759 for (j=0;j<n_interior_dofs;j++) { 2760 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2761 valid = PETSC_FALSE; 2762 break; 2763 } 2764 } 2765 PetscCall(VecRestoreArray(matis->x,&array)); 2766 } 2767 if (valid) { 2768 benign_n = 1; 2769 PetscCall(PetscMalloc1(benign_n,&zerodiag_subs)); 2770 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2771 zerodiag_subs[0] = zerodiag; 2772 } 2773 } 2774 if (checkb) { 2775 PetscCall(VecDestroyVecs(2,&work)); 2776 } 2777 } 2778 PetscCall(PetscFree(interior_dofs)); 2779 2780 if (!benign_n) { 2781 PetscInt n; 2782 2783 PetscCall(ISDestroy(&zerodiag)); 2784 recompute_zerodiag = PETSC_FALSE; 2785 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2786 if (n) have_null = PETSC_FALSE; 2787 } 2788 2789 /* final check for null pressures */ 2790 if (zerodiag && pressures) { 2791 PetscCall(ISEqual(pressures,zerodiag,&have_null)); 2792 } 2793 2794 if (recompute_zerodiag) { 2795 PetscCall(ISDestroy(&zerodiag)); 2796 if (benign_n == 1) { 2797 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2798 zerodiag = zerodiag_subs[0]; 2799 } else { 2800 PetscInt i,nzn,*new_idxs; 2801 2802 nzn = 0; 2803 for (i=0;i<benign_n;i++) { 2804 PetscInt ns; 2805 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2806 nzn += ns; 2807 } 2808 PetscCall(PetscMalloc1(nzn,&new_idxs)); 2809 nzn = 0; 2810 for (i=0;i<benign_n;i++) { 2811 PetscInt ns,*idxs; 2812 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2813 PetscCall(ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2814 PetscCall(PetscArraycpy(new_idxs+nzn,idxs,ns)); 2815 PetscCall(ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2816 nzn += ns; 2817 } 2818 PetscCall(PetscSortInt(nzn,new_idxs)); 2819 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag)); 2820 } 2821 have_null = PETSC_FALSE; 2822 } 2823 2824 /* determines if the coarse solver will be singular or not */ 2825 PetscCall(MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 2826 2827 /* Prepare matrix to compute no-net-flux */ 2828 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2829 Mat A,loc_divudotp; 2830 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2831 IS row,col,isused = NULL; 2832 PetscInt M,N,n,st,n_isused; 2833 2834 if (pressures) { 2835 isused = pressures; 2836 } else { 2837 isused = zerodiag_save; 2838 } 2839 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL)); 2840 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2841 PetscCall(MatGetLocalSize(A,&n,NULL)); 2842 PetscCheck(isused || (n == 0),PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2843 n_isused = 0; 2844 if (isused) { 2845 PetscCall(ISGetLocalSize(isused,&n_isused)); 2846 } 2847 PetscCallMPI(MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 2848 st = st-n_isused; 2849 if (n) { 2850 const PetscInt *gidxs; 2851 2852 PetscCall(MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp)); 2853 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 2854 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2855 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2856 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col)); 2857 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 2858 } else { 2859 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp)); 2860 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2861 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col)); 2862 } 2863 PetscCall(MatGetSize(pc->pmat,NULL,&N)); 2864 PetscCall(ISGetSize(row,&M)); 2865 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 2866 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 2867 PetscCall(ISDestroy(&row)); 2868 PetscCall(ISDestroy(&col)); 2869 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp)); 2870 PetscCall(MatSetType(pcbddc->divudotp,MATIS)); 2871 PetscCall(MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N)); 2872 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g)); 2873 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2874 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2875 PetscCall(MatISSetLocalMat(pcbddc->divudotp,loc_divudotp)); 2876 PetscCall(MatDestroy(&loc_divudotp)); 2877 PetscCall(MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2878 PetscCall(MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2879 } 2880 PetscCall(ISDestroy(&zerodiag_save)); 2881 PetscCall(ISDestroy(&pressures)); 2882 if (bzerodiag) { 2883 PetscInt i; 2884 2885 for (i=0;i<bsp;i++) { 2886 PetscCall(ISDestroy(&bzerodiag[i])); 2887 } 2888 PetscCall(PetscFree(bzerodiag)); 2889 } 2890 pcbddc->benign_n = benign_n; 2891 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2892 2893 /* determines if the problem has subdomains with 0 pressure block */ 2894 have_null = (PetscBool)(!!pcbddc->benign_n); 2895 PetscCall(MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 2896 2897 project_b0: 2898 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2899 /* change of basis and p0 dofs */ 2900 if (pcbddc->benign_n) { 2901 PetscInt i,s,*nnz; 2902 2903 /* local change of basis for pressures */ 2904 PetscCall(MatDestroy(&pcbddc->benign_change)); 2905 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change)); 2906 PetscCall(MatSetType(pcbddc->benign_change,MATAIJ)); 2907 PetscCall(MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE)); 2908 PetscCall(PetscMalloc1(n,&nnz)); 2909 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2910 for (i=0;i<pcbddc->benign_n;i++) { 2911 const PetscInt *idxs; 2912 PetscInt nzs,j; 2913 2914 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs)); 2915 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2916 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2917 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2918 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2919 } 2920 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz)); 2921 PetscCall(MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 2922 PetscCall(PetscFree(nnz)); 2923 /* set identity by default */ 2924 for (i=0;i<n;i++) { 2925 PetscCall(MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES)); 2926 } 2927 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 2928 PetscCall(PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0)); 2929 /* set change on pressures */ 2930 for (s=0;s<pcbddc->benign_n;s++) { 2931 PetscScalar *array; 2932 const PetscInt *idxs; 2933 PetscInt nzs; 2934 2935 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs)); 2936 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2937 for (i=0;i<nzs-1;i++) { 2938 PetscScalar vals[2]; 2939 PetscInt cols[2]; 2940 2941 cols[0] = idxs[i]; 2942 cols[1] = idxs[nzs-1]; 2943 vals[0] = 1.; 2944 vals[1] = 1.; 2945 PetscCall(MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES)); 2946 } 2947 PetscCall(PetscMalloc1(nzs,&array)); 2948 for (i=0;i<nzs-1;i++) array[i] = -1.; 2949 array[nzs-1] = 1.; 2950 PetscCall(MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES)); 2951 /* store local idxs for p0 */ 2952 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2953 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2954 PetscCall(PetscFree(array)); 2955 } 2956 PetscCall(MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2957 PetscCall(MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2958 2959 /* project if needed */ 2960 if (pcbddc->benign_change_explicit) { 2961 Mat M; 2962 2963 PetscCall(MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M)); 2964 PetscCall(MatDestroy(&pcbddc->local_mat)); 2965 PetscCall(MatSeqAIJCompress(M,&pcbddc->local_mat)); 2966 PetscCall(MatDestroy(&M)); 2967 } 2968 /* store global idxs for p0 */ 2969 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx)); 2970 } 2971 *zerodiaglocal = zerodiag; 2972 PetscFunctionReturn(0); 2973 } 2974 2975 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2976 { 2977 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2978 PetscScalar *array; 2979 2980 PetscFunctionBegin; 2981 if (!pcbddc->benign_sf) { 2982 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf)); 2983 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx)); 2984 } 2985 if (get) { 2986 PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array)); 2987 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 2988 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 2989 PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 2990 } else { 2991 PetscCall(VecGetArray(v,&array)); 2992 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 2993 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 2994 PetscCall(VecRestoreArray(v,&array)); 2995 } 2996 PetscFunctionReturn(0); 2997 } 2998 2999 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3000 { 3001 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3002 3003 PetscFunctionBegin; 3004 /* TODO: add error checking 3005 - avoid nested pop (or push) calls. 3006 - cannot push before pop. 3007 - cannot call this if pcbddc->local_mat is NULL 3008 */ 3009 if (!pcbddc->benign_n) { 3010 PetscFunctionReturn(0); 3011 } 3012 if (pop) { 3013 if (pcbddc->benign_change_explicit) { 3014 IS is_p0; 3015 MatReuse reuse; 3016 3017 /* extract B_0 */ 3018 reuse = MAT_INITIAL_MATRIX; 3019 if (pcbddc->benign_B0) { 3020 reuse = MAT_REUSE_MATRIX; 3021 } 3022 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0)); 3023 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0)); 3024 /* remove rows and cols from local problem */ 3025 PetscCall(MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE)); 3026 PetscCall(MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 3027 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL)); 3028 PetscCall(ISDestroy(&is_p0)); 3029 } else { 3030 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3031 PetscScalar *vals; 3032 PetscInt i,n,*idxs_ins; 3033 3034 PetscCall(VecGetLocalSize(matis->y,&n)); 3035 PetscCall(PetscMalloc2(n,&idxs_ins,n,&vals)); 3036 if (!pcbddc->benign_B0) { 3037 PetscInt *nnz; 3038 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0)); 3039 PetscCall(MatSetType(pcbddc->benign_B0,MATAIJ)); 3040 PetscCall(MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE)); 3041 PetscCall(PetscMalloc1(pcbddc->benign_n,&nnz)); 3042 for (i=0;i<pcbddc->benign_n;i++) { 3043 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i])); 3044 nnz[i] = n - nnz[i]; 3045 } 3046 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz)); 3047 PetscCall(MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 3048 PetscCall(PetscFree(nnz)); 3049 } 3050 3051 for (i=0;i<pcbddc->benign_n;i++) { 3052 PetscScalar *array; 3053 PetscInt *idxs,j,nz,cum; 3054 3055 PetscCall(VecSet(matis->x,0.)); 3056 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz)); 3057 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3058 for (j=0;j<nz;j++) vals[j] = 1.; 3059 PetscCall(VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES)); 3060 PetscCall(VecAssemblyBegin(matis->x)); 3061 PetscCall(VecAssemblyEnd(matis->x)); 3062 PetscCall(VecSet(matis->y,0.)); 3063 PetscCall(MatMult(matis->A,matis->x,matis->y)); 3064 PetscCall(VecGetArray(matis->y,&array)); 3065 cum = 0; 3066 for (j=0;j<n;j++) { 3067 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3068 vals[cum] = array[j]; 3069 idxs_ins[cum] = j; 3070 cum++; 3071 } 3072 } 3073 PetscCall(MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES)); 3074 PetscCall(VecRestoreArray(matis->y,&array)); 3075 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3076 } 3077 PetscCall(MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3078 PetscCall(MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3079 PetscCall(PetscFree2(idxs_ins,vals)); 3080 } 3081 } else { /* push */ 3082 if (pcbddc->benign_change_explicit) { 3083 PetscInt i; 3084 3085 for (i=0;i<pcbddc->benign_n;i++) { 3086 PetscScalar *B0_vals; 3087 PetscInt *B0_cols,B0_ncol; 3088 3089 PetscCall(MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3090 PetscCall(MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES)); 3091 PetscCall(MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES)); 3092 PetscCall(MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES)); 3093 PetscCall(MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3094 } 3095 PetscCall(MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3096 PetscCall(MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3097 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3098 } 3099 PetscFunctionReturn(0); 3100 } 3101 3102 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3103 { 3104 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3105 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3106 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3107 PetscBLASInt *B_iwork,*B_ifail; 3108 PetscScalar *work,lwork; 3109 PetscScalar *St,*S,*eigv; 3110 PetscScalar *Sarray,*Starray; 3111 PetscReal *eigs,thresh,lthresh,uthresh; 3112 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3113 PetscBool allocated_S_St,upart; 3114 #if defined(PETSC_USE_COMPLEX) 3115 PetscReal *rwork; 3116 #endif 3117 3118 PetscFunctionBegin; 3119 if (!pcbddc->adaptive_selection) PetscFunctionReturn(0); 3120 PetscCheck(sub_schurs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3121 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3122 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3123 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3124 3125 if (pcbddc->dbg_flag) { 3126 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3127 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3128 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 3129 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n")); 3130 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3131 } 3132 3133 if (pcbddc->dbg_flag) { 3134 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef)); 3135 } 3136 3137 /* max size of subsets */ 3138 mss = 0; 3139 for (i=0;i<sub_schurs->n_subs;i++) { 3140 PetscInt subset_size; 3141 3142 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3143 mss = PetscMax(mss,subset_size); 3144 } 3145 3146 /* min/max and threshold */ 3147 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3148 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3149 nmax = PetscMax(nmin,nmax); 3150 allocated_S_St = PETSC_FALSE; 3151 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3152 allocated_S_St = PETSC_TRUE; 3153 } 3154 3155 /* allocate lapack workspace */ 3156 cum = cum2 = 0; 3157 maxneigs = 0; 3158 for (i=0;i<sub_schurs->n_subs;i++) { 3159 PetscInt n,subset_size; 3160 3161 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3162 n = PetscMin(subset_size,nmax); 3163 cum += subset_size; 3164 cum2 += subset_size*n; 3165 maxneigs = PetscMax(maxneigs,n); 3166 } 3167 lwork = 0; 3168 if (mss) { 3169 if (sub_schurs->is_symmetric) { 3170 PetscScalar sdummy = 0.; 3171 PetscBLASInt B_itype = 1; 3172 PetscBLASInt B_N = mss, idummy = 0; 3173 PetscReal rdummy = 0.,zero = 0.0; 3174 PetscReal eps = 0.0; /* dlamch? */ 3175 3176 B_lwork = -1; 3177 /* some implementations may complain about NULL pointers, even if we are querying */ 3178 S = &sdummy; 3179 St = &sdummy; 3180 eigs = &rdummy; 3181 eigv = &sdummy; 3182 B_iwork = &idummy; 3183 B_ifail = &idummy; 3184 #if defined(PETSC_USE_COMPLEX) 3185 rwork = &rdummy; 3186 #endif 3187 thresh = 1.0; 3188 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3189 #if defined(PETSC_USE_COMPLEX) 3190 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3191 #else 3192 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3193 #endif 3194 PetscCheck(B_ierr == 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3195 PetscCall(PetscFPTrapPop()); 3196 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3197 } 3198 3199 nv = 0; 3200 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3201 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv)); 3202 } 3203 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork)); 3204 if (allocated_S_St) { 3205 PetscCall(PetscMalloc2(mss*mss,&S,mss*mss,&St)); 3206 } 3207 PetscCall(PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail)); 3208 #if defined(PETSC_USE_COMPLEX) 3209 PetscCall(PetscMalloc1(7*mss,&rwork)); 3210 #endif 3211 PetscCall(PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3212 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3213 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3214 nv+cum,&pcbddc->adaptive_constraints_idxs, 3215 nv+cum2,&pcbddc->adaptive_constraints_data)); 3216 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs)); 3217 3218 maxneigs = 0; 3219 cum = cumarray = 0; 3220 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3221 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3222 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3223 const PetscInt *idxs; 3224 3225 PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs)); 3226 for (cum=0;cum<nv;cum++) { 3227 pcbddc->adaptive_constraints_n[cum] = 1; 3228 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3229 pcbddc->adaptive_constraints_data[cum] = 1.0; 3230 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3231 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3232 } 3233 PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 3234 } 3235 3236 if (mss) { /* multilevel */ 3237 if (sub_schurs->gdsw) { 3238 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray)); 3239 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3240 } else { 3241 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3242 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3243 } 3244 } 3245 3246 lthresh = pcbddc->adaptive_threshold[0]; 3247 uthresh = pcbddc->adaptive_threshold[1]; 3248 upart = pcbddc->use_deluxe_scaling; 3249 for (i=0;i<sub_schurs->n_subs;i++) { 3250 const PetscInt *idxs; 3251 PetscReal upper,lower; 3252 PetscInt j,subset_size,eigs_start = 0; 3253 PetscBLASInt B_N; 3254 PetscBool same_data = PETSC_FALSE; 3255 PetscBool scal = PETSC_FALSE; 3256 3257 if (upart) { 3258 upper = PETSC_MAX_REAL; 3259 lower = uthresh; 3260 } else { 3261 if (sub_schurs->gdsw) { 3262 upper = uthresh; 3263 lower = PETSC_MIN_REAL; 3264 } else { 3265 PetscCheck(sub_schurs->is_posdef,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3266 upper = 1./uthresh; 3267 lower = 0.; 3268 } 3269 } 3270 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3271 PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs)); 3272 PetscCall(PetscBLASIntCast(subset_size,&B_N)); 3273 /* this is experimental: we assume the dofs have been properly grouped to have 3274 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3275 if (!sub_schurs->is_posdef) { 3276 Mat T; 3277 3278 for (j=0;j<subset_size;j++) { 3279 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3280 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T)); 3281 PetscCall(MatScale(T,-1.0)); 3282 PetscCall(MatDestroy(&T)); 3283 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T)); 3284 PetscCall(MatScale(T,-1.0)); 3285 PetscCall(MatDestroy(&T)); 3286 if (sub_schurs->change_primal_sub) { 3287 PetscInt nz,k; 3288 const PetscInt *idxs; 3289 3290 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz)); 3291 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i],&idxs)); 3292 for (k=0;k<nz;k++) { 3293 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3294 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3295 } 3296 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs)); 3297 } 3298 scal = PETSC_TRUE; 3299 break; 3300 } 3301 } 3302 } 3303 3304 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3305 if (sub_schurs->is_symmetric) { 3306 PetscInt j,k; 3307 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3308 PetscCall(PetscArrayzero(S,subset_size*subset_size)); 3309 PetscCall(PetscArrayzero(St,subset_size*subset_size)); 3310 } 3311 for (j=0;j<subset_size;j++) { 3312 for (k=j;k<subset_size;k++) { 3313 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3314 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3315 } 3316 } 3317 } else { 3318 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3319 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3320 } 3321 } else { 3322 S = Sarray + cumarray; 3323 St = Starray + cumarray; 3324 } 3325 /* see if we can save some work */ 3326 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3327 PetscCall(PetscArraycmp(S,St,subset_size*subset_size,&same_data)); 3328 } 3329 3330 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3331 B_neigs = 0; 3332 } else { 3333 if (sub_schurs->is_symmetric) { 3334 PetscBLASInt B_itype = 1; 3335 PetscBLASInt B_IL, B_IU; 3336 PetscReal eps = -1.0; /* dlamch? */ 3337 PetscInt nmin_s; 3338 PetscBool compute_range; 3339 3340 B_neigs = 0; 3341 compute_range = (PetscBool)!same_data; 3342 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3343 3344 if (pcbddc->dbg_flag) { 3345 PetscInt nc = 0; 3346 3347 if (sub_schurs->change_primal_sub) { 3348 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc)); 3349 } 3350 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc)); 3351 } 3352 3353 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3354 if (compute_range) { 3355 3356 /* ask for eigenvalues larger than thresh */ 3357 if (sub_schurs->is_posdef) { 3358 #if defined(PETSC_USE_COMPLEX) 3359 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3360 #else 3361 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3362 #endif 3363 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3364 } else { /* no theory so far, but it works nicely */ 3365 PetscInt recipe = 0,recipe_m = 1; 3366 PetscReal bb[2]; 3367 3368 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL)); 3369 switch (recipe) { 3370 case 0: 3371 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3372 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3373 #if defined(PETSC_USE_COMPLEX) 3374 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3375 #else 3376 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3377 #endif 3378 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3379 break; 3380 case 1: 3381 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3382 #if defined(PETSC_USE_COMPLEX) 3383 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3384 #else 3385 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3386 #endif 3387 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3388 if (!scal) { 3389 PetscBLASInt B_neigs2 = 0; 3390 3391 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3392 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3393 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3394 #if defined(PETSC_USE_COMPLEX) 3395 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3396 #else 3397 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3398 #endif 3399 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3400 B_neigs += B_neigs2; 3401 } 3402 break; 3403 case 2: 3404 if (scal) { 3405 bb[0] = PETSC_MIN_REAL; 3406 bb[1] = 0; 3407 #if defined(PETSC_USE_COMPLEX) 3408 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3409 #else 3410 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3411 #endif 3412 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3413 } else { 3414 PetscBLASInt B_neigs2 = 0; 3415 PetscBool import = PETSC_FALSE; 3416 3417 lthresh = PetscMax(lthresh,0.0); 3418 if (lthresh > 0.0) { 3419 bb[0] = PETSC_MIN_REAL; 3420 bb[1] = lthresh*lthresh; 3421 3422 import = PETSC_TRUE; 3423 #if defined(PETSC_USE_COMPLEX) 3424 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3425 #else 3426 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3427 #endif 3428 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3429 } 3430 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3431 bb[1] = PETSC_MAX_REAL; 3432 if (import) { 3433 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3434 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3435 } 3436 #if defined(PETSC_USE_COMPLEX) 3437 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3438 #else 3439 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3440 #endif 3441 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3442 B_neigs += B_neigs2; 3443 } 3444 break; 3445 case 3: 3446 if (scal) { 3447 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL)); 3448 } else { 3449 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL)); 3450 } 3451 if (!scal) { 3452 bb[0] = uthresh; 3453 bb[1] = PETSC_MAX_REAL; 3454 #if defined(PETSC_USE_COMPLEX) 3455 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3456 #else 3457 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3458 #endif 3459 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3460 } 3461 if (recipe_m > 0 && B_N - B_neigs > 0) { 3462 PetscBLASInt B_neigs2 = 0; 3463 3464 B_IL = 1; 3465 PetscCall(PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU)); 3466 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3467 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3468 #if defined(PETSC_USE_COMPLEX) 3469 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3470 #else 3471 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3472 #endif 3473 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3474 B_neigs += B_neigs2; 3475 } 3476 break; 3477 case 4: 3478 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3479 #if defined(PETSC_USE_COMPLEX) 3480 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3481 #else 3482 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3483 #endif 3484 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3485 { 3486 PetscBLASInt B_neigs2 = 0; 3487 3488 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3489 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3490 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3491 #if defined(PETSC_USE_COMPLEX) 3492 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3493 #else 3494 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3495 #endif 3496 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3497 B_neigs += B_neigs2; 3498 } 3499 break; 3500 case 5: /* same as before: first compute all eigenvalues, then filter */ 3501 #if defined(PETSC_USE_COMPLEX) 3502 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3503 #else 3504 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3505 #endif 3506 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3507 { 3508 PetscInt e,k,ne; 3509 for (e=0,ne=0;e<B_neigs;e++) { 3510 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3511 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3512 eigs[ne] = eigs[e]; 3513 ne++; 3514 } 3515 } 3516 PetscCall(PetscArraycpy(eigv,S,B_N*ne)); 3517 B_neigs = ne; 3518 } 3519 break; 3520 default: 3521 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %" PetscInt_FMT,recipe); 3522 } 3523 } 3524 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3525 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3526 B_IL = 1; 3527 #if defined(PETSC_USE_COMPLEX) 3528 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3529 #else 3530 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3531 #endif 3532 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3533 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3534 PetscInt k; 3535 PetscCheck(sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3536 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax)); 3537 PetscCall(PetscBLASIntCast(nmax,&B_neigs)); 3538 nmin = nmax; 3539 PetscCall(PetscArrayzero(eigv,subset_size*nmax)); 3540 for (k=0;k<nmax;k++) { 3541 eigs[k] = 1./PETSC_SMALL; 3542 eigv[k*(subset_size+1)] = 1.0; 3543 } 3544 } 3545 PetscCall(PetscFPTrapPop()); 3546 if (B_ierr) { 3547 PetscCheck(B_ierr >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT,-B_ierr); 3548 PetscCheck(B_ierr > B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge",B_ierr); 3549 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite",B_ierr-B_N-1); 3550 } 3551 3552 if (B_neigs > nmax) { 3553 if (pcbddc->dbg_flag) { 3554 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n",B_neigs,nmax)); 3555 } 3556 if (upart) eigs_start = scal ? 0 : B_neigs-nmax; 3557 B_neigs = nmax; 3558 } 3559 3560 nmin_s = PetscMin(nmin,B_N); 3561 if (B_neigs < nmin_s) { 3562 PetscBLASInt B_neigs2 = 0; 3563 3564 if (upart) { 3565 if (scal) { 3566 B_IU = nmin_s; 3567 B_IL = B_neigs + 1; 3568 } else { 3569 B_IL = B_N - nmin_s + 1; 3570 B_IU = B_N - B_neigs; 3571 } 3572 } else { 3573 B_IL = B_neigs + 1; 3574 B_IU = nmin_s; 3575 } 3576 if (pcbddc->dbg_flag) { 3577 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU)); 3578 } 3579 if (sub_schurs->is_symmetric) { 3580 PetscInt j,k; 3581 for (j=0;j<subset_size;j++) { 3582 for (k=j;k<subset_size;k++) { 3583 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3584 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3585 } 3586 } 3587 } else { 3588 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3589 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3590 } 3591 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3592 #if defined(PETSC_USE_COMPLEX) 3593 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3594 #else 3595 PetscCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3596 #endif 3597 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3598 PetscCall(PetscFPTrapPop()); 3599 B_neigs += B_neigs2; 3600 } 3601 if (B_ierr) { 3602 PetscCheck(B_ierr >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT,-B_ierr); 3603 PetscCheck(B_ierr > B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge",B_ierr); 3604 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite",B_ierr-B_N-1); 3605 } 3606 if (pcbddc->dbg_flag) { 3607 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %" PetscBLASInt_FMT " eigs\n",B_neigs)); 3608 for (j=0;j<B_neigs;j++) { 3609 if (!sub_schurs->gdsw) { 3610 if (eigs[j] == 0.0) { 3611 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n")); 3612 } else { 3613 if (upart) { 3614 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",(double)eigs[j+eigs_start])); 3615 } else { 3616 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",(double)(1./eigs[j+eigs_start]))); 3617 } 3618 } 3619 } else { 3620 double pg = (double)eigs[j+eigs_start]; 3621 if (pg < 2*PETSC_SMALL) pg = 0.0; 3622 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",pg)); 3623 } 3624 } 3625 } 3626 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3627 } 3628 /* change the basis back to the original one */ 3629 if (sub_schurs->change) { 3630 Mat change,phi,phit; 3631 3632 if (pcbddc->dbg_flag > 2) { 3633 PetscInt ii; 3634 for (ii=0;ii<B_neigs;ii++) { 3635 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n",ii,B_neigs,B_N)); 3636 for (j=0;j<B_N;j++) { 3637 #if defined(PETSC_USE_COMPLEX) 3638 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3639 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3640 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",(double)r,(double)c)); 3641 #else 3642 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",(double)(eigv[(ii+eigs_start)*subset_size+j]))); 3643 #endif 3644 } 3645 } 3646 } 3647 PetscCall(KSPGetOperators(sub_schurs->change[i],&change,NULL)); 3648 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit)); 3649 PetscCall(MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi)); 3650 PetscCall(MatCopy(phi,phit,SAME_NONZERO_PATTERN)); 3651 PetscCall(MatDestroy(&phit)); 3652 PetscCall(MatDestroy(&phi)); 3653 } 3654 maxneigs = PetscMax(B_neigs,maxneigs); 3655 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3656 if (B_neigs) { 3657 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size)); 3658 3659 if (pcbddc->dbg_flag > 1) { 3660 PetscInt ii; 3661 for (ii=0;ii<B_neigs;ii++) { 3662 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n",ii,B_neigs,B_N)); 3663 for (j=0;j<B_N;j++) { 3664 #if defined(PETSC_USE_COMPLEX) 3665 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3666 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3667 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",(double)r,(double)c)); 3668 #else 3669 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",(double)PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]))); 3670 #endif 3671 } 3672 } 3673 } 3674 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size)); 3675 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3676 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3677 cum++; 3678 } 3679 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs)); 3680 /* shift for next computation */ 3681 cumarray += subset_size*subset_size; 3682 } 3683 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3684 3685 if (mss) { 3686 if (sub_schurs->gdsw) { 3687 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray)); 3688 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3689 } else { 3690 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3691 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3692 /* destroy matrices (junk) */ 3693 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3694 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3695 } 3696 } 3697 if (allocated_S_St) PetscCall(PetscFree2(S,St)); 3698 PetscCall(PetscFree5(eigv,eigs,work,B_iwork,B_ifail)); 3699 #if defined(PETSC_USE_COMPLEX) 3700 PetscCall(PetscFree(rwork)); 3701 #endif 3702 if (pcbddc->dbg_flag) { 3703 PetscInt maxneigs_r; 3704 PetscCall(MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc))); 3705 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %" PetscInt_FMT "\n",maxneigs_r)); 3706 } 3707 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3708 PetscFunctionReturn(0); 3709 } 3710 3711 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3712 { 3713 PetscScalar *coarse_submat_vals; 3714 3715 PetscFunctionBegin; 3716 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3717 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3718 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3719 3720 /* Setup local neumann solver ksp_R */ 3721 /* PCBDDCSetUpLocalScatters should be called first! */ 3722 PetscCall(PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE)); 3723 3724 /* 3725 Setup local correction and local part of coarse basis. 3726 Gives back the dense local part of the coarse matrix in column major ordering 3727 */ 3728 PetscCall(PCBDDCSetUpCorrection(pc,&coarse_submat_vals)); 3729 3730 /* Compute total number of coarse nodes and setup coarse solver */ 3731 PetscCall(PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals)); 3732 3733 /* free */ 3734 PetscCall(PetscFree(coarse_submat_vals)); 3735 PetscFunctionReturn(0); 3736 } 3737 3738 PetscErrorCode PCBDDCResetCustomization(PC pc) 3739 { 3740 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3741 3742 PetscFunctionBegin; 3743 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3744 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3745 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3746 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3747 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3748 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3749 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3750 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3751 PetscCall(PCBDDCSetDofsSplitting(pc,0,NULL)); 3752 PetscCall(PCBDDCSetDofsSplittingLocal(pc,0,NULL)); 3753 PetscFunctionReturn(0); 3754 } 3755 3756 PetscErrorCode PCBDDCResetTopography(PC pc) 3757 { 3758 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3759 PetscInt i; 3760 3761 PetscFunctionBegin; 3762 PetscCall(MatDestroy(&pcbddc->nedcG)); 3763 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3764 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3765 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3766 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3767 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3768 PetscCall(VecDestroy(&pcbddc->work_change)); 3769 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3770 PetscCall(MatDestroy(&pcbddc->divudotp)); 3771 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3772 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3773 for (i=0;i<pcbddc->n_local_subs;i++) { 3774 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3775 } 3776 pcbddc->n_local_subs = 0; 3777 PetscCall(PetscFree(pcbddc->local_subs)); 3778 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3779 pcbddc->graphanalyzed = PETSC_FALSE; 3780 pcbddc->recompute_topography = PETSC_TRUE; 3781 pcbddc->corner_selected = PETSC_FALSE; 3782 PetscFunctionReturn(0); 3783 } 3784 3785 PetscErrorCode PCBDDCResetSolvers(PC pc) 3786 { 3787 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3788 3789 PetscFunctionBegin; 3790 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3791 if (pcbddc->coarse_phi_B) { 3792 PetscScalar *array; 3793 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&array)); 3794 PetscCall(PetscFree(array)); 3795 } 3796 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3797 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3798 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3799 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3800 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3801 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3802 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3803 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3804 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3805 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3806 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3807 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3808 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3809 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3810 PetscCall(KSPReset(pcbddc->ksp_D)); 3811 PetscCall(KSPReset(pcbddc->ksp_R)); 3812 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3813 PetscCall(MatDestroy(&pcbddc->local_mat)); 3814 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3815 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 3816 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3817 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3818 PetscCall(MatDestroy(&pcbddc->benign_change)); 3819 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3820 PetscCall(PCBDDCBenignShellMat(pc,PETSC_TRUE)); 3821 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3822 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3823 if (pcbddc->benign_zerodiag_subs) { 3824 PetscInt i; 3825 for (i=0;i<pcbddc->benign_n;i++) { 3826 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3827 } 3828 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3829 } 3830 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 3831 PetscFunctionReturn(0); 3832 } 3833 3834 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3835 { 3836 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3837 PC_IS *pcis = (PC_IS*)pc->data; 3838 VecType impVecType; 3839 PetscInt n_constraints,n_R,old_size; 3840 3841 PetscFunctionBegin; 3842 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3843 n_R = pcis->n - pcbddc->n_vertices; 3844 PetscCall(VecGetType(pcis->vec1_N,&impVecType)); 3845 /* local work vectors (try to avoid unneeded work)*/ 3846 /* R nodes */ 3847 old_size = -1; 3848 if (pcbddc->vec1_R) { 3849 PetscCall(VecGetSize(pcbddc->vec1_R,&old_size)); 3850 } 3851 if (n_R != old_size) { 3852 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3853 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3854 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R)); 3855 PetscCall(VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R)); 3856 PetscCall(VecSetType(pcbddc->vec1_R,impVecType)); 3857 PetscCall(VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R)); 3858 } 3859 /* local primal dofs */ 3860 old_size = -1; 3861 if (pcbddc->vec1_P) { 3862 PetscCall(VecGetSize(pcbddc->vec1_P,&old_size)); 3863 } 3864 if (pcbddc->local_primal_size != old_size) { 3865 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3866 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P)); 3867 PetscCall(VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size)); 3868 PetscCall(VecSetType(pcbddc->vec1_P,impVecType)); 3869 } 3870 /* local explicit constraints */ 3871 old_size = -1; 3872 if (pcbddc->vec1_C) { 3873 PetscCall(VecGetSize(pcbddc->vec1_C,&old_size)); 3874 } 3875 if (n_constraints && n_constraints != old_size) { 3876 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3877 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C)); 3878 PetscCall(VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints)); 3879 PetscCall(VecSetType(pcbddc->vec1_C,impVecType)); 3880 } 3881 PetscFunctionReturn(0); 3882 } 3883 3884 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3885 { 3886 /* pointers to pcis and pcbddc */ 3887 PC_IS* pcis = (PC_IS*)pc->data; 3888 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3889 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3890 /* submatrices of local problem */ 3891 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3892 /* submatrices of local coarse problem */ 3893 Mat S_VV,S_CV,S_VC,S_CC; 3894 /* working matrices */ 3895 Mat C_CR; 3896 /* additional working stuff */ 3897 PC pc_R; 3898 Mat F,Brhs = NULL; 3899 Vec dummy_vec; 3900 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3901 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3902 PetscScalar *work; 3903 PetscInt *idx_V_B; 3904 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3905 PetscInt i,n_R,n_D,n_B; 3906 PetscScalar one=1.0,m_one=-1.0; 3907 3908 PetscFunctionBegin; 3909 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3910 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 3911 3912 /* Set Non-overlapping dimensions */ 3913 n_vertices = pcbddc->n_vertices; 3914 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3915 n_B = pcis->n_B; 3916 n_D = pcis->n - n_B; 3917 n_R = pcis->n - n_vertices; 3918 3919 /* vertices in boundary numbering */ 3920 PetscCall(PetscMalloc1(n_vertices,&idx_V_B)); 3921 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B)); 3922 PetscCheck(i == n_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT,n_vertices,i); 3923 3924 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3925 PetscCall(PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals)); 3926 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV)); 3927 PetscCall(MatDenseSetLDA(S_VV,pcbddc->local_primal_size)); 3928 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV)); 3929 PetscCall(MatDenseSetLDA(S_CV,pcbddc->local_primal_size)); 3930 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC)); 3931 PetscCall(MatDenseSetLDA(S_VC,pcbddc->local_primal_size)); 3932 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC)); 3933 PetscCall(MatDenseSetLDA(S_CC,pcbddc->local_primal_size)); 3934 3935 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3936 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_R)); 3937 PetscCall(PCSetUp(pc_R)); 3938 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU)); 3939 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL)); 3940 lda_rhs = n_R; 3941 need_benign_correction = PETSC_FALSE; 3942 if (isLU || isCHOL) { 3943 PetscCall(PCFactorGetMatrix(pc_R,&F)); 3944 } else if (sub_schurs && sub_schurs->reuse_solver) { 3945 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3946 MatFactorType type; 3947 3948 F = reuse_solver->F; 3949 PetscCall(MatGetFactorType(F,&type)); 3950 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3951 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3952 PetscCall(MatGetSize(F,&lda_rhs,NULL)); 3953 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3954 } else F = NULL; 3955 3956 /* determine if we can use a sparse right-hand side */ 3957 sparserhs = PETSC_FALSE; 3958 if (F) { 3959 MatSolverType solver; 3960 3961 PetscCall(MatFactorGetSolverType(F,&solver)); 3962 PetscCall(PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs)); 3963 } 3964 3965 /* allocate workspace */ 3966 n = 0; 3967 if (n_constraints) { 3968 n += lda_rhs*n_constraints; 3969 } 3970 if (n_vertices) { 3971 n = PetscMax(2*lda_rhs*n_vertices,n); 3972 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3973 } 3974 if (!pcbddc->symmetric_primal) { 3975 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3976 } 3977 PetscCall(PetscMalloc1(n,&work)); 3978 3979 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3980 dummy_vec = NULL; 3981 if (need_benign_correction && lda_rhs != n_R && F) { 3982 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec)); 3983 PetscCall(VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE)); 3984 PetscCall(VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name)); 3985 } 3986 3987 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3988 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3989 3990 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3991 if (n_constraints) { 3992 Mat M3,C_B; 3993 IS is_aux; 3994 3995 /* Extract constraints on R nodes: C_{CR} */ 3996 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux)); 3997 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR)); 3998 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 3999 4000 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4001 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4002 if (!sparserhs) { 4003 PetscCall(PetscArrayzero(work,lda_rhs*n_constraints)); 4004 for (i=0;i<n_constraints;i++) { 4005 const PetscScalar *row_cmat_values; 4006 const PetscInt *row_cmat_indices; 4007 PetscInt size_of_constraint,j; 4008 4009 PetscCall(MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 4010 for (j=0;j<size_of_constraint;j++) { 4011 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4012 } 4013 PetscCall(MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 4014 } 4015 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs)); 4016 } else { 4017 Mat tC_CR; 4018 4019 PetscCall(MatScale(C_CR,-1.0)); 4020 if (lda_rhs != n_R) { 4021 PetscScalar *aa; 4022 PetscInt r,*ii,*jj; 4023 PetscBool done; 4024 4025 PetscCall(MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4026 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4027 PetscCall(MatSeqAIJGetArray(C_CR,&aa)); 4028 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR)); 4029 PetscCall(MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4030 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4031 } else { 4032 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4033 tC_CR = C_CR; 4034 } 4035 PetscCall(MatCreateTranspose(tC_CR,&Brhs)); 4036 PetscCall(MatDestroy(&tC_CR)); 4037 } 4038 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R)); 4039 if (F) { 4040 if (need_benign_correction) { 4041 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4042 4043 /* rhs is already zero on interior dofs, no need to change the rhs */ 4044 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n)); 4045 } 4046 PetscCall(MatMatSolve(F,Brhs,local_auxmat2_R)); 4047 if (need_benign_correction) { 4048 PetscScalar *marr; 4049 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4050 4051 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4052 if (lda_rhs != n_R) { 4053 for (i=0;i<n_constraints;i++) { 4054 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4055 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4056 PetscCall(VecResetArray(dummy_vec)); 4057 } 4058 } else { 4059 for (i=0;i<n_constraints;i++) { 4060 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4061 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4062 PetscCall(VecResetArray(pcbddc->vec1_R)); 4063 } 4064 } 4065 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4066 } 4067 } else { 4068 PetscScalar *marr; 4069 4070 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4071 for (i=0;i<n_constraints;i++) { 4072 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4073 PetscCall(VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs)); 4074 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4075 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4076 PetscCall(VecResetArray(pcbddc->vec1_R)); 4077 PetscCall(VecResetArray(pcbddc->vec2_R)); 4078 } 4079 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4080 } 4081 if (sparserhs) PetscCall(MatScale(C_CR,-1.0)); 4082 PetscCall(MatDestroy(&Brhs)); 4083 if (!pcbddc->switch_static) { 4084 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2)); 4085 for (i=0;i<n_constraints;i++) { 4086 Vec r, b; 4087 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R,i,&r)); 4088 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2,i,&b)); 4089 PetscCall(VecScatterBegin(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4090 PetscCall(VecScatterEnd(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4091 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2,i,&b)); 4092 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R,i,&r)); 4093 } 4094 PetscCall(MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4095 } else { 4096 if (lda_rhs != n_R) { 4097 IS dummy; 4098 4099 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy)); 4100 PetscCall(MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2)); 4101 PetscCall(ISDestroy(&dummy)); 4102 } else { 4103 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4104 pcbddc->local_auxmat2 = local_auxmat2_R; 4105 } 4106 PetscCall(MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4107 } 4108 PetscCall(ISDestroy(&is_aux)); 4109 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4110 PetscCall(MatScale(M3,m_one)); 4111 if (isCHOL) { 4112 PetscCall(MatCholeskyFactor(M3,NULL,NULL)); 4113 } else { 4114 PetscCall(MatLUFactor(M3,NULL,NULL,NULL)); 4115 } 4116 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 4117 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4118 PetscCall(MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1)); 4119 PetscCall(MatDestroy(&C_B)); 4120 PetscCall(MatCopy(M3,S_CC,SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4121 PetscCall(MatDestroy(&M3)); 4122 } 4123 4124 /* Get submatrices from subdomain matrix */ 4125 if (n_vertices) { 4126 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4127 PetscBool oldpin; 4128 #endif 4129 PetscBool isaij; 4130 IS is_aux; 4131 4132 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4133 IS tis; 4134 4135 PetscCall(ISDuplicate(pcbddc->is_R_local,&tis)); 4136 PetscCall(ISSort(tis)); 4137 PetscCall(ISComplement(tis,0,pcis->n,&is_aux)); 4138 PetscCall(ISDestroy(&tis)); 4139 } else { 4140 PetscCall(ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux)); 4141 } 4142 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4143 oldpin = pcbddc->local_mat->boundtocpu; 4144 #endif 4145 PetscCall(MatBindToCPU(pcbddc->local_mat,PETSC_TRUE)); 4146 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV)); 4147 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR)); 4148 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij)); 4149 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4150 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4151 } 4152 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV)); 4153 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4154 PetscCall(MatBindToCPU(pcbddc->local_mat,oldpin)); 4155 #endif 4156 PetscCall(ISDestroy(&is_aux)); 4157 } 4158 4159 /* Matrix of coarse basis functions (local) */ 4160 if (pcbddc->coarse_phi_B) { 4161 PetscInt on_B,on_primal,on_D=n_D; 4162 if (pcbddc->coarse_phi_D) { 4163 PetscCall(MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL)); 4164 } 4165 PetscCall(MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal)); 4166 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4167 PetscScalar *marray; 4168 4169 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&marray)); 4170 PetscCall(PetscFree(marray)); 4171 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4172 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4173 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4174 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4175 } 4176 } 4177 4178 if (!pcbddc->coarse_phi_B) { 4179 PetscScalar *marr; 4180 4181 /* memory size */ 4182 n = n_B*pcbddc->local_primal_size; 4183 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4184 if (!pcbddc->symmetric_primal) n *= 2; 4185 PetscCall(PetscCalloc1(n,&marr)); 4186 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B)); 4187 marr += n_B*pcbddc->local_primal_size; 4188 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4189 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D)); 4190 marr += n_D*pcbddc->local_primal_size; 4191 } 4192 if (!pcbddc->symmetric_primal) { 4193 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B)); 4194 marr += n_B*pcbddc->local_primal_size; 4195 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4196 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D)); 4197 } 4198 } else { 4199 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4200 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4201 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4202 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4203 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4204 } 4205 } 4206 } 4207 4208 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4209 p0_lidx_I = NULL; 4210 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4211 const PetscInt *idxs; 4212 4213 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 4214 PetscCall(PetscMalloc1(pcbddc->benign_n,&p0_lidx_I)); 4215 for (i=0;i<pcbddc->benign_n;i++) { 4216 PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i])); 4217 } 4218 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 4219 } 4220 4221 /* vertices */ 4222 if (n_vertices) { 4223 PetscBool restoreavr = PETSC_FALSE; 4224 4225 PetscCall(MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV)); 4226 4227 if (n_R) { 4228 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4229 PetscBLASInt B_N,B_one = 1; 4230 const PetscScalar *x; 4231 PetscScalar *y; 4232 4233 PetscCall(MatScale(A_RV,m_one)); 4234 if (need_benign_correction) { 4235 ISLocalToGlobalMapping RtoN; 4236 IS is_p0; 4237 PetscInt *idxs_p0,n; 4238 4239 PetscCall(PetscMalloc1(pcbddc->benign_n,&idxs_p0)); 4240 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN)); 4241 PetscCall(ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0)); 4242 PetscCheck(n == pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT,n,pcbddc->benign_n); 4243 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4244 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0)); 4245 PetscCall(MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr)); 4246 PetscCall(ISDestroy(&is_p0)); 4247 } 4248 4249 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV)); 4250 if (!sparserhs || need_benign_correction) { 4251 if (lda_rhs == n_R) { 4252 PetscCall(MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV)); 4253 } else { 4254 PetscScalar *av,*array; 4255 const PetscInt *xadj,*adjncy; 4256 PetscInt n; 4257 PetscBool flg_row; 4258 4259 array = work+lda_rhs*n_vertices; 4260 PetscCall(PetscArrayzero(array,lda_rhs*n_vertices)); 4261 PetscCall(MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV)); 4262 PetscCall(MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4263 PetscCall(MatSeqAIJGetArray(A_RV,&av)); 4264 for (i=0;i<n;i++) { 4265 PetscInt j; 4266 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4267 } 4268 PetscCall(MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4269 PetscCall(MatDestroy(&A_RV)); 4270 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV)); 4271 } 4272 if (need_benign_correction) { 4273 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4274 PetscScalar *marr; 4275 4276 PetscCall(MatDenseGetArray(A_RV,&marr)); 4277 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4278 4279 | 0 0 0 | (V) 4280 L = | 0 0 -1 | (P-p0) 4281 | 0 0 -1 | (p0) 4282 4283 */ 4284 for (i=0;i<reuse_solver->benign_n;i++) { 4285 const PetscScalar *vals; 4286 const PetscInt *idxs,*idxs_zero; 4287 PetscInt n,j,nz; 4288 4289 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4290 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4291 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4292 for (j=0;j<n;j++) { 4293 PetscScalar val = vals[j]; 4294 PetscInt k,col = idxs[j]; 4295 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4296 } 4297 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4298 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4299 } 4300 PetscCall(MatDenseRestoreArray(A_RV,&marr)); 4301 } 4302 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4303 Brhs = A_RV; 4304 } else { 4305 Mat tA_RVT,A_RVT; 4306 4307 if (!pcbddc->symmetric_primal) { 4308 /* A_RV already scaled by -1 */ 4309 PetscCall(MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT)); 4310 } else { 4311 restoreavr = PETSC_TRUE; 4312 PetscCall(MatScale(A_VR,-1.0)); 4313 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4314 A_RVT = A_VR; 4315 } 4316 if (lda_rhs != n_R) { 4317 PetscScalar *aa; 4318 PetscInt r,*ii,*jj; 4319 PetscBool done; 4320 4321 PetscCall(MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4322 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4323 PetscCall(MatSeqAIJGetArray(A_RVT,&aa)); 4324 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT)); 4325 PetscCall(MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4326 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4327 } else { 4328 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4329 tA_RVT = A_RVT; 4330 } 4331 PetscCall(MatCreateTranspose(tA_RVT,&Brhs)); 4332 PetscCall(MatDestroy(&tA_RVT)); 4333 PetscCall(MatDestroy(&A_RVT)); 4334 } 4335 if (F) { 4336 /* need to correct the rhs */ 4337 if (need_benign_correction) { 4338 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4339 PetscScalar *marr; 4340 4341 PetscCall(MatDenseGetArray(Brhs,&marr)); 4342 if (lda_rhs != n_R) { 4343 for (i=0;i<n_vertices;i++) { 4344 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4345 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE)); 4346 PetscCall(VecResetArray(dummy_vec)); 4347 } 4348 } else { 4349 for (i=0;i<n_vertices;i++) { 4350 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4351 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE)); 4352 PetscCall(VecResetArray(pcbddc->vec1_R)); 4353 } 4354 } 4355 PetscCall(MatDenseRestoreArray(Brhs,&marr)); 4356 } 4357 PetscCall(MatMatSolve(F,Brhs,A_RRmA_RV)); 4358 if (restoreavr) PetscCall(MatScale(A_VR,-1.0)); 4359 /* need to correct the solution */ 4360 if (need_benign_correction) { 4361 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4362 PetscScalar *marr; 4363 4364 PetscCall(MatDenseGetArray(A_RRmA_RV,&marr)); 4365 if (lda_rhs != n_R) { 4366 for (i=0;i<n_vertices;i++) { 4367 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4368 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4369 PetscCall(VecResetArray(dummy_vec)); 4370 } 4371 } else { 4372 for (i=0;i<n_vertices;i++) { 4373 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4374 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4375 PetscCall(VecResetArray(pcbddc->vec1_R)); 4376 } 4377 } 4378 PetscCall(MatDenseRestoreArray(A_RRmA_RV,&marr)); 4379 } 4380 } else { 4381 PetscCall(MatDenseGetArray(Brhs,&y)); 4382 for (i=0;i<n_vertices;i++) { 4383 PetscCall(VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs)); 4384 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs)); 4385 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4386 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4387 PetscCall(VecResetArray(pcbddc->vec1_R)); 4388 PetscCall(VecResetArray(pcbddc->vec2_R)); 4389 } 4390 PetscCall(MatDenseRestoreArray(Brhs,&y)); 4391 } 4392 PetscCall(MatDestroy(&A_RV)); 4393 PetscCall(MatDestroy(&Brhs)); 4394 /* S_VV and S_CV */ 4395 if (n_constraints) { 4396 Mat B; 4397 4398 PetscCall(PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices)); 4399 for (i=0;i<n_vertices;i++) { 4400 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4401 PetscCall(VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B)); 4402 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4403 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4404 PetscCall(VecResetArray(pcis->vec1_B)); 4405 PetscCall(VecResetArray(pcbddc->vec1_R)); 4406 } 4407 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B)); 4408 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4409 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV)); 4410 PetscCall(MatProductSetType(S_CV,MATPRODUCT_AB)); 4411 PetscCall(MatProductSetFromOptions(S_CV)); 4412 PetscCall(MatProductSymbolic(S_CV)); 4413 PetscCall(MatProductNumeric(S_CV)); 4414 PetscCall(MatProductClear(S_CV)); 4415 4416 PetscCall(MatDestroy(&B)); 4417 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B)); 4418 /* Reuse B = local_auxmat2_R * S_CV */ 4419 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B)); 4420 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4421 PetscCall(MatProductSetFromOptions(B)); 4422 PetscCall(MatProductSymbolic(B)); 4423 PetscCall(MatProductNumeric(B)); 4424 4425 PetscCall(MatScale(S_CV,m_one)); 4426 PetscCall(PetscBLASIntCast(lda_rhs*n_vertices,&B_N)); 4427 PetscCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4428 PetscCall(MatDestroy(&B)); 4429 } 4430 if (lda_rhs != n_R) { 4431 PetscCall(MatDestroy(&A_RRmA_RV)); 4432 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV)); 4433 PetscCall(MatDenseSetLDA(A_RRmA_RV,lda_rhs)); 4434 } 4435 PetscCall(MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt)); 4436 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4437 if (need_benign_correction) { 4438 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4439 PetscScalar *marr,*sums; 4440 4441 PetscCall(PetscMalloc1(n_vertices,&sums)); 4442 PetscCall(MatDenseGetArray(S_VVt,&marr)); 4443 for (i=0;i<reuse_solver->benign_n;i++) { 4444 const PetscScalar *vals; 4445 const PetscInt *idxs,*idxs_zero; 4446 PetscInt n,j,nz; 4447 4448 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4449 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4450 for (j=0;j<n_vertices;j++) { 4451 PetscInt k; 4452 sums[j] = 0.; 4453 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4454 } 4455 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4456 for (j=0;j<n;j++) { 4457 PetscScalar val = vals[j]; 4458 PetscInt k; 4459 for (k=0;k<n_vertices;k++) { 4460 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4461 } 4462 } 4463 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4464 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4465 } 4466 PetscCall(PetscFree(sums)); 4467 PetscCall(MatDenseRestoreArray(S_VVt,&marr)); 4468 PetscCall(MatDestroy(&A_RV_bcorr)); 4469 } 4470 PetscCall(MatDestroy(&A_RRmA_RV)); 4471 PetscCall(PetscBLASIntCast(n_vertices*n_vertices,&B_N)); 4472 PetscCall(MatDenseGetArrayRead(A_VV,&x)); 4473 PetscCall(MatDenseGetArray(S_VVt,&y)); 4474 PetscCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4475 PetscCall(MatDenseRestoreArrayRead(A_VV,&x)); 4476 PetscCall(MatDenseRestoreArray(S_VVt,&y)); 4477 PetscCall(MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN)); 4478 PetscCall(MatDestroy(&S_VVt)); 4479 } else { 4480 PetscCall(MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN)); 4481 } 4482 PetscCall(MatDestroy(&A_VV)); 4483 4484 /* coarse basis functions */ 4485 for (i=0;i<n_vertices;i++) { 4486 Vec v; 4487 PetscScalar one = 1.0,zero = 0.0; 4488 4489 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4490 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i,&v)); 4491 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4492 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4493 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4494 PetscMPIInt rank; 4495 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),&rank)); 4496 PetscCheck(rank <= 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4497 } 4498 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4499 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4500 PetscCall(VecAssemblyEnd(v)); 4501 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i,&v)); 4502 4503 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4504 PetscInt j; 4505 4506 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i,&v)); 4507 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4508 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4509 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4510 PetscMPIInt rank; 4511 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),&rank)); 4512 PetscCheck(rank <= 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4513 } 4514 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4515 PetscCall(VecAssemblyBegin(v)); 4516 PetscCall(VecAssemblyEnd(v)); 4517 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i,&v)); 4518 } 4519 PetscCall(VecResetArray(pcbddc->vec1_R)); 4520 } 4521 /* if n_R == 0 the object is not destroyed */ 4522 PetscCall(MatDestroy(&A_RV)); 4523 } 4524 PetscCall(VecDestroy(&dummy_vec)); 4525 4526 if (n_constraints) { 4527 Mat B; 4528 4529 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B)); 4530 PetscCall(MatScale(S_CC,m_one)); 4531 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B)); 4532 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4533 PetscCall(MatProductSetFromOptions(B)); 4534 PetscCall(MatProductSymbolic(B)); 4535 PetscCall(MatProductNumeric(B)); 4536 4537 PetscCall(MatScale(S_CC,m_one)); 4538 if (n_vertices) { 4539 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4540 PetscCall(MatTransposeSetPrecursor(S_CV,S_VC)); 4541 PetscCall(MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC)); 4542 } else { 4543 Mat S_VCt; 4544 4545 if (lda_rhs != n_R) { 4546 PetscCall(MatDestroy(&B)); 4547 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B)); 4548 PetscCall(MatDenseSetLDA(B,lda_rhs)); 4549 } 4550 PetscCall(MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt)); 4551 PetscCall(MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN)); 4552 PetscCall(MatDestroy(&S_VCt)); 4553 } 4554 } 4555 PetscCall(MatDestroy(&B)); 4556 /* coarse basis functions */ 4557 for (i=0;i<n_constraints;i++) { 4558 Vec v; 4559 4560 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4561 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4562 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4563 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4564 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4565 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4566 PetscInt j; 4567 PetscScalar zero = 0.0; 4568 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4569 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4570 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4571 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4572 PetscCall(VecAssemblyBegin(v)); 4573 PetscCall(VecAssemblyEnd(v)); 4574 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4575 } 4576 PetscCall(VecResetArray(pcbddc->vec1_R)); 4577 } 4578 } 4579 if (n_constraints) { 4580 PetscCall(MatDestroy(&local_auxmat2_R)); 4581 } 4582 PetscCall(PetscFree(p0_lidx_I)); 4583 4584 /* coarse matrix entries relative to B_0 */ 4585 if (pcbddc->benign_n) { 4586 Mat B0_B,B0_BPHI; 4587 IS is_dummy; 4588 const PetscScalar *data; 4589 PetscInt j; 4590 4591 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4592 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4593 PetscCall(ISDestroy(&is_dummy)); 4594 PetscCall(MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4595 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4596 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data)); 4597 for (j=0;j<pcbddc->benign_n;j++) { 4598 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4599 for (i=0;i<pcbddc->local_primal_size;i++) { 4600 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4601 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4602 } 4603 } 4604 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data)); 4605 PetscCall(MatDestroy(&B0_B)); 4606 PetscCall(MatDestroy(&B0_BPHI)); 4607 } 4608 4609 /* compute other basis functions for non-symmetric problems */ 4610 if (!pcbddc->symmetric_primal) { 4611 Mat B_V=NULL,B_C=NULL; 4612 PetscScalar *marray; 4613 4614 if (n_constraints) { 4615 Mat S_CCT,C_CRT; 4616 4617 PetscCall(MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT)); 4618 PetscCall(MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT)); 4619 PetscCall(MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C)); 4620 PetscCall(MatDestroy(&S_CCT)); 4621 if (n_vertices) { 4622 Mat S_VCT; 4623 4624 PetscCall(MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT)); 4625 PetscCall(MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V)); 4626 PetscCall(MatDestroy(&S_VCT)); 4627 } 4628 PetscCall(MatDestroy(&C_CRT)); 4629 } else { 4630 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V)); 4631 } 4632 if (n_vertices && n_R) { 4633 PetscScalar *av,*marray; 4634 const PetscInt *xadj,*adjncy; 4635 PetscInt n; 4636 PetscBool flg_row; 4637 4638 /* B_V = B_V - A_VR^T */ 4639 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4640 PetscCall(MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4641 PetscCall(MatSeqAIJGetArray(A_VR,&av)); 4642 PetscCall(MatDenseGetArray(B_V,&marray)); 4643 for (i=0;i<n;i++) { 4644 PetscInt j; 4645 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4646 } 4647 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4648 PetscCall(MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4649 PetscCall(MatDestroy(&A_VR)); 4650 } 4651 4652 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4653 if (n_vertices) { 4654 PetscCall(MatDenseGetArray(B_V,&marray)); 4655 for (i=0;i<n_vertices;i++) { 4656 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+i*n_R)); 4657 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4658 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4659 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4660 PetscCall(VecResetArray(pcbddc->vec1_R)); 4661 PetscCall(VecResetArray(pcbddc->vec2_R)); 4662 } 4663 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4664 } 4665 if (B_C) { 4666 PetscCall(MatDenseGetArray(B_C,&marray)); 4667 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4668 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R)); 4669 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4670 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4671 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4672 PetscCall(VecResetArray(pcbddc->vec1_R)); 4673 PetscCall(VecResetArray(pcbddc->vec2_R)); 4674 } 4675 PetscCall(MatDenseRestoreArray(B_C,&marray)); 4676 } 4677 /* coarse basis functions */ 4678 for (i=0;i<pcbddc->local_primal_size;i++) { 4679 Vec v; 4680 4681 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*n_R)); 4682 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B,i,&v)); 4683 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4684 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4685 if (i<n_vertices) { 4686 PetscScalar one = 1.0; 4687 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4688 PetscCall(VecAssemblyBegin(v)); 4689 PetscCall(VecAssemblyEnd(v)); 4690 } 4691 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B,i,&v)); 4692 4693 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4694 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D,i,&v)); 4695 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4696 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4697 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D,i,&v)); 4698 } 4699 PetscCall(VecResetArray(pcbddc->vec1_R)); 4700 } 4701 PetscCall(MatDestroy(&B_V)); 4702 PetscCall(MatDestroy(&B_C)); 4703 } 4704 4705 /* free memory */ 4706 PetscCall(PetscFree(idx_V_B)); 4707 PetscCall(MatDestroy(&S_VV)); 4708 PetscCall(MatDestroy(&S_CV)); 4709 PetscCall(MatDestroy(&S_VC)); 4710 PetscCall(MatDestroy(&S_CC)); 4711 PetscCall(PetscFree(work)); 4712 if (n_vertices) { 4713 PetscCall(MatDestroy(&A_VR)); 4714 } 4715 if (n_constraints) { 4716 PetscCall(MatDestroy(&C_CR)); 4717 } 4718 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 4719 4720 /* Checking coarse_sub_mat and coarse basis functios */ 4721 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4722 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4723 if (pcbddc->dbg_flag) { 4724 Mat coarse_sub_mat; 4725 Mat AUXMAT,TM1,TM2,TM3,TM4; 4726 Mat coarse_phi_D,coarse_phi_B; 4727 Mat coarse_psi_D,coarse_psi_B; 4728 Mat A_II,A_BB,A_IB,A_BI; 4729 Mat C_B,CPHI; 4730 IS is_dummy; 4731 Vec mones; 4732 MatType checkmattype=MATSEQAIJ; 4733 PetscReal real_value; 4734 4735 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4736 Mat A; 4737 PetscCall(PCBDDCBenignProject(pc,NULL,NULL,&A)); 4738 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II)); 4739 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB)); 4740 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI)); 4741 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB)); 4742 PetscCall(MatDestroy(&A)); 4743 } else { 4744 PetscCall(MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II)); 4745 PetscCall(MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB)); 4746 PetscCall(MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI)); 4747 PetscCall(MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB)); 4748 } 4749 PetscCall(MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D)); 4750 PetscCall(MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B)); 4751 if (!pcbddc->symmetric_primal) { 4752 PetscCall(MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D)); 4753 PetscCall(MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B)); 4754 } 4755 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat)); 4756 4757 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 4758 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal)); 4759 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4760 if (!pcbddc->symmetric_primal) { 4761 PetscCall(MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4762 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1)); 4763 PetscCall(MatDestroy(&AUXMAT)); 4764 PetscCall(MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4765 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2)); 4766 PetscCall(MatDestroy(&AUXMAT)); 4767 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4768 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4769 PetscCall(MatDestroy(&AUXMAT)); 4770 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4771 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4772 PetscCall(MatDestroy(&AUXMAT)); 4773 } else { 4774 PetscCall(MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1)); 4775 PetscCall(MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2)); 4776 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4777 PetscCall(MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4778 PetscCall(MatDestroy(&AUXMAT)); 4779 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4780 PetscCall(MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4781 PetscCall(MatDestroy(&AUXMAT)); 4782 } 4783 PetscCall(MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN)); 4784 PetscCall(MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN)); 4785 PetscCall(MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN)); 4786 PetscCall(MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1)); 4787 if (pcbddc->benign_n) { 4788 Mat B0_B,B0_BPHI; 4789 const PetscScalar *data2; 4790 PetscScalar *data; 4791 PetscInt j; 4792 4793 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4794 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4795 PetscCall(MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4796 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4797 PetscCall(MatDenseGetArray(TM1,&data)); 4798 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data2)); 4799 for (j=0;j<pcbddc->benign_n;j++) { 4800 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4801 for (i=0;i<pcbddc->local_primal_size;i++) { 4802 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4803 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4804 } 4805 } 4806 PetscCall(MatDenseRestoreArray(TM1,&data)); 4807 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data2)); 4808 PetscCall(MatDestroy(&B0_B)); 4809 PetscCall(ISDestroy(&is_dummy)); 4810 PetscCall(MatDestroy(&B0_BPHI)); 4811 } 4812 #if 0 4813 { 4814 PetscViewer viewer; 4815 char filename[256]; 4816 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4817 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4818 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4819 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4820 PetscCall(MatView(coarse_sub_mat,viewer)); 4821 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4822 PetscCall(MatView(TM1,viewer)); 4823 if (pcbddc->coarse_phi_B) { 4824 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4825 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4826 } 4827 if (pcbddc->coarse_phi_D) { 4828 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4829 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4830 } 4831 if (pcbddc->coarse_psi_B) { 4832 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4833 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4834 } 4835 if (pcbddc->coarse_psi_D) { 4836 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4837 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4838 } 4839 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4840 PetscCall(MatView(pcbddc->local_mat,viewer)); 4841 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4842 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4843 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4844 PetscCall(ISView(pcis->is_I_local,viewer)); 4845 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4846 PetscCall(ISView(pcis->is_B_local,viewer)); 4847 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4848 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4849 PetscCall(PetscViewerDestroy(&viewer)); 4850 } 4851 #endif 4852 PetscCall(MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN)); 4853 PetscCall(MatNorm(TM1,NORM_FROBENIUS,&real_value)); 4854 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4855 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4856 4857 /* check constraints */ 4858 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy)); 4859 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 4860 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4861 PetscCall(MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4862 } else { 4863 PetscScalar *data; 4864 Mat tmat; 4865 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&data)); 4866 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat)); 4867 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B,&data)); 4868 PetscCall(MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4869 PetscCall(MatDestroy(&tmat)); 4870 } 4871 PetscCall(MatCreateVecs(CPHI,&mones,NULL)); 4872 PetscCall(VecSet(mones,-1.0)); 4873 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4874 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4875 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4876 if (!pcbddc->symmetric_primal) { 4877 PetscCall(MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI)); 4878 PetscCall(VecSet(mones,-1.0)); 4879 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4880 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4881 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4882 } 4883 PetscCall(MatDestroy(&C_B)); 4884 PetscCall(MatDestroy(&CPHI)); 4885 PetscCall(ISDestroy(&is_dummy)); 4886 PetscCall(VecDestroy(&mones)); 4887 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4888 PetscCall(MatDestroy(&A_II)); 4889 PetscCall(MatDestroy(&A_BB)); 4890 PetscCall(MatDestroy(&A_IB)); 4891 PetscCall(MatDestroy(&A_BI)); 4892 PetscCall(MatDestroy(&TM1)); 4893 PetscCall(MatDestroy(&TM2)); 4894 PetscCall(MatDestroy(&TM3)); 4895 PetscCall(MatDestroy(&TM4)); 4896 PetscCall(MatDestroy(&coarse_phi_D)); 4897 PetscCall(MatDestroy(&coarse_phi_B)); 4898 if (!pcbddc->symmetric_primal) { 4899 PetscCall(MatDestroy(&coarse_psi_D)); 4900 PetscCall(MatDestroy(&coarse_psi_B)); 4901 } 4902 PetscCall(MatDestroy(&coarse_sub_mat)); 4903 } 4904 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4905 { 4906 PetscBool gpu; 4907 4908 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu)); 4909 if (gpu) { 4910 if (pcbddc->local_auxmat1) { 4911 PetscCall(MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1)); 4912 } 4913 if (pcbddc->local_auxmat2) { 4914 PetscCall(MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2)); 4915 } 4916 if (pcbddc->coarse_phi_B) { 4917 PetscCall(MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B)); 4918 } 4919 if (pcbddc->coarse_phi_D) { 4920 PetscCall(MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D)); 4921 } 4922 if (pcbddc->coarse_psi_B) { 4923 PetscCall(MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B)); 4924 } 4925 if (pcbddc->coarse_psi_D) { 4926 PetscCall(MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D)); 4927 } 4928 } 4929 } 4930 /* get back data */ 4931 *coarse_submat_vals_n = coarse_submat_vals; 4932 PetscFunctionReturn(0); 4933 } 4934 4935 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4936 { 4937 Mat *work_mat; 4938 IS isrow_s,iscol_s; 4939 PetscBool rsorted,csorted; 4940 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4941 4942 PetscFunctionBegin; 4943 PetscCall(ISSorted(isrow,&rsorted)); 4944 PetscCall(ISSorted(iscol,&csorted)); 4945 PetscCall(ISGetLocalSize(isrow,&rsize)); 4946 PetscCall(ISGetLocalSize(iscol,&csize)); 4947 4948 if (!rsorted) { 4949 const PetscInt *idxs; 4950 PetscInt *idxs_sorted,i; 4951 4952 PetscCall(PetscMalloc1(rsize,&idxs_perm_r)); 4953 PetscCall(PetscMalloc1(rsize,&idxs_sorted)); 4954 for (i=0;i<rsize;i++) { 4955 idxs_perm_r[i] = i; 4956 } 4957 PetscCall(ISGetIndices(isrow,&idxs)); 4958 PetscCall(PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r)); 4959 for (i=0;i<rsize;i++) { 4960 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4961 } 4962 PetscCall(ISRestoreIndices(isrow,&idxs)); 4963 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s)); 4964 } else { 4965 PetscCall(PetscObjectReference((PetscObject)isrow)); 4966 isrow_s = isrow; 4967 } 4968 4969 if (!csorted) { 4970 if (isrow == iscol) { 4971 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4972 iscol_s = isrow_s; 4973 } else { 4974 const PetscInt *idxs; 4975 PetscInt *idxs_sorted,i; 4976 4977 PetscCall(PetscMalloc1(csize,&idxs_perm_c)); 4978 PetscCall(PetscMalloc1(csize,&idxs_sorted)); 4979 for (i=0;i<csize;i++) { 4980 idxs_perm_c[i] = i; 4981 } 4982 PetscCall(ISGetIndices(iscol,&idxs)); 4983 PetscCall(PetscSortIntWithPermutation(csize,idxs,idxs_perm_c)); 4984 for (i=0;i<csize;i++) { 4985 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4986 } 4987 PetscCall(ISRestoreIndices(iscol,&idxs)); 4988 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s)); 4989 } 4990 } else { 4991 PetscCall(PetscObjectReference((PetscObject)iscol)); 4992 iscol_s = iscol; 4993 } 4994 4995 PetscCall(MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat)); 4996 4997 if (!rsorted || !csorted) { 4998 Mat new_mat; 4999 IS is_perm_r,is_perm_c; 5000 5001 if (!rsorted) { 5002 PetscInt *idxs_r,i; 5003 PetscCall(PetscMalloc1(rsize,&idxs_r)); 5004 for (i=0;i<rsize;i++) { 5005 idxs_r[idxs_perm_r[i]] = i; 5006 } 5007 PetscCall(PetscFree(idxs_perm_r)); 5008 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r)); 5009 } else { 5010 PetscCall(ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r)); 5011 } 5012 PetscCall(ISSetPermutation(is_perm_r)); 5013 5014 if (!csorted) { 5015 if (isrow_s == iscol_s) { 5016 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5017 is_perm_c = is_perm_r; 5018 } else { 5019 PetscInt *idxs_c,i; 5020 PetscCheck(idxs_perm_c,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5021 PetscCall(PetscMalloc1(csize,&idxs_c)); 5022 for (i=0;i<csize;i++) { 5023 idxs_c[idxs_perm_c[i]] = i; 5024 } 5025 PetscCall(PetscFree(idxs_perm_c)); 5026 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c)); 5027 } 5028 } else { 5029 PetscCall(ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c)); 5030 } 5031 PetscCall(ISSetPermutation(is_perm_c)); 5032 5033 PetscCall(MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat)); 5034 PetscCall(MatDestroy(&work_mat[0])); 5035 work_mat[0] = new_mat; 5036 PetscCall(ISDestroy(&is_perm_r)); 5037 PetscCall(ISDestroy(&is_perm_c)); 5038 } 5039 5040 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5041 *B = work_mat[0]; 5042 PetscCall(MatDestroyMatrices(1,&work_mat)); 5043 PetscCall(ISDestroy(&isrow_s)); 5044 PetscCall(ISDestroy(&iscol_s)); 5045 PetscFunctionReturn(0); 5046 } 5047 5048 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5049 { 5050 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5051 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5052 Mat new_mat,lA; 5053 IS is_local,is_global; 5054 PetscInt local_size; 5055 PetscBool isseqaij,issym,isset; 5056 5057 PetscFunctionBegin; 5058 PetscCall(MatDestroy(&pcbddc->local_mat)); 5059 PetscCall(MatGetSize(matis->A,&local_size,NULL)); 5060 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local)); 5061 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping,is_local,&is_global)); 5062 PetscCall(ISDestroy(&is_local)); 5063 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat)); 5064 PetscCall(ISDestroy(&is_global)); 5065 5066 if (pcbddc->dbg_flag) { 5067 Vec x,x_change; 5068 PetscReal error; 5069 5070 PetscCall(MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change)); 5071 PetscCall(VecSetRandom(x,NULL)); 5072 PetscCall(MatMult(ChangeOfBasisMatrix,x,x_change)); 5073 PetscCall(VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5074 PetscCall(VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5075 PetscCall(MatMult(new_mat,matis->x,matis->y)); 5076 if (!pcbddc->change_interior) { 5077 const PetscScalar *x,*y,*v; 5078 PetscReal lerror = 0.; 5079 PetscInt i; 5080 5081 PetscCall(VecGetArrayRead(matis->x,&x)); 5082 PetscCall(VecGetArrayRead(matis->y,&y)); 5083 PetscCall(VecGetArrayRead(matis->counter,&v)); 5084 for (i=0;i<local_size;i++) 5085 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5086 lerror = PetscAbsScalar(x[i]-y[i]); 5087 PetscCall(VecRestoreArrayRead(matis->x,&x)); 5088 PetscCall(VecRestoreArrayRead(matis->y,&y)); 5089 PetscCall(VecRestoreArrayRead(matis->counter,&v)); 5090 PetscCall(MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)pc))); 5091 if (error > PETSC_SMALL) { 5092 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5093 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",(double)error); 5094 } else { 5095 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",(double)error); 5096 } 5097 } 5098 } 5099 PetscCall(VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5100 PetscCall(VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5101 PetscCall(VecAXPY(x,-1.0,x_change)); 5102 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 5103 if (error > PETSC_SMALL) { 5104 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5105 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",(double)error); 5106 } else { 5107 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",(double)error); 5108 } 5109 } 5110 PetscCall(VecDestroy(&x)); 5111 PetscCall(VecDestroy(&x_change)); 5112 } 5113 5114 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5115 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA)); 5116 5117 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5118 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij)); 5119 if (isseqaij) { 5120 PetscCall(MatDestroy(&pcbddc->local_mat)); 5121 PetscCall(MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5122 if (lA) { 5123 Mat work; 5124 PetscCall(MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5125 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5126 PetscCall(MatDestroy(&work)); 5127 } 5128 } else { 5129 Mat work_mat; 5130 5131 PetscCall(MatDestroy(&pcbddc->local_mat)); 5132 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5133 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5134 PetscCall(MatDestroy(&work_mat)); 5135 if (lA) { 5136 Mat work; 5137 PetscCall(MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5138 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5139 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5140 PetscCall(MatDestroy(&work)); 5141 } 5142 } 5143 PetscCall(MatIsSymmetricKnown(matis->A,&isset,&issym)); 5144 if (isset) PetscCall(MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,issym)); 5145 PetscCall(MatDestroy(&new_mat)); 5146 PetscFunctionReturn(0); 5147 } 5148 5149 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5150 { 5151 PC_IS* pcis = (PC_IS*)(pc->data); 5152 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5153 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5154 PetscInt *idx_R_local=NULL; 5155 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5156 PetscInt vbs,bs; 5157 PetscBT bitmask=NULL; 5158 5159 PetscFunctionBegin; 5160 /* 5161 No need to setup local scatters if 5162 - primal space is unchanged 5163 AND 5164 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5165 AND 5166 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5167 */ 5168 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5169 PetscFunctionReturn(0); 5170 } 5171 /* destroy old objects */ 5172 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5173 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5174 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5175 /* Set Non-overlapping dimensions */ 5176 n_B = pcis->n_B; 5177 n_D = pcis->n - n_B; 5178 n_vertices = pcbddc->n_vertices; 5179 5180 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5181 5182 /* create auxiliary bitmask and allocate workspace */ 5183 if (!sub_schurs || !sub_schurs->reuse_solver) { 5184 PetscCall(PetscMalloc1(pcis->n-n_vertices,&idx_R_local)); 5185 PetscCall(PetscBTCreate(pcis->n,&bitmask)); 5186 for (i=0;i<n_vertices;i++) { 5187 PetscCall(PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i])); 5188 } 5189 5190 for (i=0, n_R=0; i<pcis->n; i++) { 5191 if (!PetscBTLookup(bitmask,i)) { 5192 idx_R_local[n_R++] = i; 5193 } 5194 } 5195 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5196 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5197 5198 PetscCall(ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5199 PetscCall(ISGetLocalSize(reuse_solver->is_R,&n_R)); 5200 } 5201 5202 /* Block code */ 5203 vbs = 1; 5204 PetscCall(MatGetBlockSize(pcbddc->local_mat,&bs)); 5205 if (bs>1 && !(n_vertices%bs)) { 5206 PetscBool is_blocked = PETSC_TRUE; 5207 PetscInt *vary; 5208 if (!sub_schurs || !sub_schurs->reuse_solver) { 5209 PetscCall(PetscMalloc1(pcis->n/bs,&vary)); 5210 PetscCall(PetscArrayzero(vary,pcis->n/bs)); 5211 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5212 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5213 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5214 for (i=0; i<pcis->n/bs; i++) { 5215 if (vary[i]!=0 && vary[i]!=bs) { 5216 is_blocked = PETSC_FALSE; 5217 break; 5218 } 5219 } 5220 PetscCall(PetscFree(vary)); 5221 } else { 5222 /* Verify directly the R set */ 5223 for (i=0; i<n_R/bs; i++) { 5224 PetscInt j,node=idx_R_local[bs*i]; 5225 for (j=1; j<bs; j++) { 5226 if (node != idx_R_local[bs*i+j]-j) { 5227 is_blocked = PETSC_FALSE; 5228 break; 5229 } 5230 } 5231 } 5232 } 5233 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5234 vbs = bs; 5235 for (i=0;i<n_R/vbs;i++) { 5236 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5237 } 5238 } 5239 } 5240 PetscCall(ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local)); 5241 if (sub_schurs && sub_schurs->reuse_solver) { 5242 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5243 5244 PetscCall(ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5245 PetscCall(ISDestroy(&reuse_solver->is_R)); 5246 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5247 reuse_solver->is_R = pcbddc->is_R_local; 5248 } else { 5249 PetscCall(PetscFree(idx_R_local)); 5250 } 5251 5252 /* print some info if requested */ 5253 if (pcbddc->dbg_flag) { 5254 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5255 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5256 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5257 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank)); 5258 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n",pcis->n,n_D,n_B)); 5259 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size)); 5260 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5261 } 5262 5263 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5264 if (!sub_schurs || !sub_schurs->reuse_solver) { 5265 IS is_aux1,is_aux2; 5266 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5267 5268 PetscCall(ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5269 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array1)); 5270 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array2)); 5271 PetscCall(ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5272 for (i=0; i<n_D; i++) { 5273 PetscCall(PetscBTSet(bitmask,is_indices[i])); 5274 } 5275 PetscCall(ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5276 for (i=0, j=0; i<n_R; i++) { 5277 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5278 aux_array1[j++] = i; 5279 } 5280 } 5281 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5282 PetscCall(ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5283 for (i=0, j=0; i<n_B; i++) { 5284 if (!PetscBTLookup(bitmask,is_indices[i])) { 5285 aux_array2[j++] = i; 5286 } 5287 } 5288 PetscCall(ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5289 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2)); 5290 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B)); 5291 PetscCall(ISDestroy(&is_aux1)); 5292 PetscCall(ISDestroy(&is_aux2)); 5293 5294 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5295 PetscCall(PetscMalloc1(n_D,&aux_array1)); 5296 for (i=0, j=0; i<n_R; i++) { 5297 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5298 aux_array1[j++] = i; 5299 } 5300 } 5301 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5302 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5303 PetscCall(ISDestroy(&is_aux1)); 5304 } 5305 PetscCall(PetscBTDestroy(&bitmask)); 5306 PetscCall(ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5307 } else { 5308 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5309 IS tis; 5310 PetscInt schur_size; 5311 5312 PetscCall(ISGetLocalSize(reuse_solver->is_B,&schur_size)); 5313 PetscCall(ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis)); 5314 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B)); 5315 PetscCall(ISDestroy(&tis)); 5316 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5317 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis)); 5318 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5319 PetscCall(ISDestroy(&tis)); 5320 } 5321 } 5322 PetscFunctionReturn(0); 5323 } 5324 5325 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5326 { 5327 MatNullSpace NullSpace; 5328 Mat dmat; 5329 const Vec *nullvecs; 5330 Vec v,v2,*nullvecs2; 5331 VecScatter sct = NULL; 5332 PetscContainer c; 5333 PetscScalar *ddata; 5334 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5335 PetscBool nnsp_has_cnst; 5336 5337 PetscFunctionBegin; 5338 if (!is && !B) { /* MATIS */ 5339 Mat_IS* matis = (Mat_IS*)A->data; 5340 5341 if (!B) { 5342 PetscCall(MatISGetLocalMat(A,&B)); 5343 } 5344 sct = matis->cctx; 5345 PetscCall(PetscObjectReference((PetscObject)sct)); 5346 } else { 5347 PetscCall(MatGetNullSpace(B,&NullSpace)); 5348 if (!NullSpace) { 5349 PetscCall(MatGetNearNullSpace(B,&NullSpace)); 5350 } 5351 if (NullSpace) PetscFunctionReturn(0); 5352 } 5353 PetscCall(MatGetNullSpace(A,&NullSpace)); 5354 if (!NullSpace) { 5355 PetscCall(MatGetNearNullSpace(A,&NullSpace)); 5356 } 5357 if (!NullSpace) PetscFunctionReturn(0); 5358 5359 PetscCall(MatCreateVecs(A,&v,NULL)); 5360 PetscCall(MatCreateVecs(B,&v2,NULL)); 5361 if (!sct) { 5362 PetscCall(VecScatterCreate(v,is,v2,NULL,&sct)); 5363 } 5364 PetscCall(MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs)); 5365 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5366 PetscCall(PetscMalloc1(bsiz,&nullvecs2)); 5367 PetscCall(VecGetBlockSize(v2,&bs)); 5368 PetscCall(VecGetSize(v2,&N)); 5369 PetscCall(VecGetLocalSize(v2,&n)); 5370 PetscCall(PetscMalloc1(n*bsiz,&ddata)); 5371 for (k=0;k<nnsp_size;k++) { 5372 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k])); 5373 PetscCall(VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5374 PetscCall(VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5375 } 5376 if (nnsp_has_cnst) { 5377 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size])); 5378 PetscCall(VecSet(nullvecs2[nnsp_size],1.0)); 5379 } 5380 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2)); 5381 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace)); 5382 5383 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat)); 5384 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B),&c)); 5385 PetscCall(PetscContainerSetPointer(c,ddata)); 5386 PetscCall(PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault)); 5387 PetscCall(PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c)); 5388 PetscCall(PetscContainerDestroy(&c)); 5389 PetscCall(PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat)); 5390 PetscCall(MatDestroy(&dmat)); 5391 5392 for (k=0;k<bsiz;k++) { 5393 PetscCall(VecDestroy(&nullvecs2[k])); 5394 } 5395 PetscCall(PetscFree(nullvecs2)); 5396 PetscCall(MatSetNearNullSpace(B,NullSpace)); 5397 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5398 PetscCall(VecDestroy(&v)); 5399 PetscCall(VecDestroy(&v2)); 5400 PetscCall(VecScatterDestroy(&sct)); 5401 PetscFunctionReturn(0); 5402 } 5403 5404 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5405 { 5406 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5407 PC_IS *pcis = (PC_IS*)pc->data; 5408 PC pc_temp; 5409 Mat A_RR; 5410 MatNullSpace nnsp; 5411 MatReuse reuse; 5412 PetscScalar m_one = -1.0; 5413 PetscReal value; 5414 PetscInt n_D,n_R; 5415 PetscBool issbaij,opts,isset,issym; 5416 void (*f)(void) = NULL; 5417 char dir_prefix[256],neu_prefix[256],str_level[16]; 5418 size_t len; 5419 5420 PetscFunctionBegin; 5421 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5422 /* approximate solver, propagate NearNullSpace if needed */ 5423 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5424 MatNullSpace gnnsp1,gnnsp2; 5425 PetscBool lhas,ghas; 5426 5427 PetscCall(MatGetNearNullSpace(pcbddc->local_mat,&nnsp)); 5428 PetscCall(MatGetNearNullSpace(pc->pmat,&gnnsp1)); 5429 PetscCall(MatGetNullSpace(pc->pmat,&gnnsp2)); 5430 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5431 PetscCall(MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 5432 if (!ghas && (gnnsp1 || gnnsp2)) { 5433 PetscCall(MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL)); 5434 } 5435 } 5436 5437 /* compute prefixes */ 5438 PetscCall(PetscStrcpy(dir_prefix,"")); 5439 PetscCall(PetscStrcpy(neu_prefix,"")); 5440 if (!pcbddc->current_level) { 5441 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix))); 5442 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix))); 5443 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5444 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5445 } else { 5446 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 5447 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 5448 len -= 15; /* remove "pc_bddc_coarse_" */ 5449 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5450 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5451 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5452 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1)); 5453 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1)); 5454 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5455 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5456 PetscCall(PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix))); 5457 PetscCall(PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix))); 5458 } 5459 5460 /* DIRICHLET PROBLEM */ 5461 if (dirichlet) { 5462 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5463 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5464 PetscCheck(sub_schurs && sub_schurs->reuse_solver,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5465 if (pcbddc->dbg_flag) { 5466 Mat A_IIn; 5467 5468 PetscCall(PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn)); 5469 PetscCall(MatDestroy(&pcis->A_II)); 5470 pcis->A_II = A_IIn; 5471 } 5472 } 5473 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat,&isset,&issym)); 5474 if (isset) PetscCall(MatSetOption(pcis->A_II,MAT_SYMMETRIC,issym)); 5475 5476 /* Matrix for Dirichlet problem is pcis->A_II */ 5477 n_D = pcis->n - pcis->n_B; 5478 opts = PETSC_FALSE; 5479 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5480 opts = PETSC_TRUE; 5481 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D)); 5482 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1)); 5483 /* default */ 5484 PetscCall(KSPSetType(pcbddc->ksp_D,KSPPREONLY)); 5485 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix)); 5486 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij)); 5487 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5488 if (issbaij) { 5489 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5490 } else { 5491 PetscCall(PCSetType(pc_temp,PCLU)); 5492 } 5493 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure)); 5494 } 5495 PetscCall(MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix)); 5496 PetscCall(KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II)); 5497 /* Allow user's customization */ 5498 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5499 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5500 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5501 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II)); 5502 } 5503 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5504 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5505 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5506 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5507 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5508 const PetscInt *idxs; 5509 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5510 5511 PetscCall(ISGetLocalSize(pcis->is_I_local,&nl)); 5512 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 5513 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5514 for (i=0;i<nl;i++) { 5515 for (d=0;d<cdim;d++) { 5516 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5517 } 5518 } 5519 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 5520 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5521 PetscCall(PetscFree(scoords)); 5522 } 5523 if (sub_schurs && sub_schurs->reuse_solver) { 5524 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5525 5526 PetscCall(KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver)); 5527 } 5528 5529 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5530 if (!n_D) { 5531 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5532 PetscCall(PCSetType(pc_temp,PCNONE)); 5533 } 5534 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5535 /* set ksp_D into pcis data */ 5536 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5537 PetscCall(KSPDestroy(&pcis->ksp_D)); 5538 pcis->ksp_D = pcbddc->ksp_D; 5539 } 5540 5541 /* NEUMANN PROBLEM */ 5542 A_RR = NULL; 5543 if (neumann) { 5544 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5545 PetscInt ibs,mbs; 5546 PetscBool issbaij, reuse_neumann_solver,isset,issym; 5547 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5548 5549 reuse_neumann_solver = PETSC_FALSE; 5550 if (sub_schurs && sub_schurs->reuse_solver) { 5551 IS iP; 5552 5553 reuse_neumann_solver = PETSC_TRUE; 5554 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP)); 5555 if (iP) reuse_neumann_solver = PETSC_FALSE; 5556 } 5557 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5558 PetscCall(ISGetSize(pcbddc->is_R_local,&n_R)); 5559 if (pcbddc->ksp_R) { /* already created ksp */ 5560 PetscInt nn_R; 5561 PetscCall(KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR)); 5562 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5563 PetscCall(MatGetSize(A_RR,&nn_R,NULL)); 5564 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5565 PetscCall(KSPReset(pcbddc->ksp_R)); 5566 PetscCall(MatDestroy(&A_RR)); 5567 reuse = MAT_INITIAL_MATRIX; 5568 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5569 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5570 PetscCall(MatDestroy(&A_RR)); 5571 reuse = MAT_INITIAL_MATRIX; 5572 } else { /* safe to reuse the matrix */ 5573 reuse = MAT_REUSE_MATRIX; 5574 } 5575 } 5576 /* last check */ 5577 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5578 PetscCall(MatDestroy(&A_RR)); 5579 reuse = MAT_INITIAL_MATRIX; 5580 } 5581 } else { /* first time, so we need to create the matrix */ 5582 reuse = MAT_INITIAL_MATRIX; 5583 } 5584 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5585 TODO: Get Rid of these conversions */ 5586 PetscCall(MatGetBlockSize(pcbddc->local_mat,&mbs)); 5587 PetscCall(ISGetBlockSize(pcbddc->is_R_local,&ibs)); 5588 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij)); 5589 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5590 if (matis->A == pcbddc->local_mat) { 5591 PetscCall(MatDestroy(&pcbddc->local_mat)); 5592 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5593 } else { 5594 PetscCall(MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5595 } 5596 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5597 if (matis->A == pcbddc->local_mat) { 5598 PetscCall(MatDestroy(&pcbddc->local_mat)); 5599 PetscCall(MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5600 } else { 5601 PetscCall(MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5602 } 5603 } 5604 /* extract A_RR */ 5605 if (reuse_neumann_solver) { 5606 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5607 5608 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5609 PetscCall(MatDestroy(&A_RR)); 5610 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5611 PetscCall(PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR)); 5612 } else { 5613 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR)); 5614 } 5615 } else { 5616 PetscCall(MatDestroy(&A_RR)); 5617 PetscCall(PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL)); 5618 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5619 } 5620 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5621 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR)); 5622 } 5623 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat,&isset,&issym)); 5624 if (isset) PetscCall(MatSetOption(A_RR,MAT_SYMMETRIC,issym)); 5625 opts = PETSC_FALSE; 5626 if (!pcbddc->ksp_R) { /* create object if not present */ 5627 opts = PETSC_TRUE; 5628 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R)); 5629 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1)); 5630 /* default */ 5631 PetscCall(KSPSetType(pcbddc->ksp_R,KSPPREONLY)); 5632 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix)); 5633 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5634 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij)); 5635 if (issbaij) { 5636 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5637 } else { 5638 PetscCall(PCSetType(pc_temp,PCLU)); 5639 } 5640 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure)); 5641 } 5642 PetscCall(KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR)); 5643 PetscCall(MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix)); 5644 if (opts) { /* Allow user's customization once */ 5645 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5646 } 5647 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5648 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5649 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR)); 5650 } 5651 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5652 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5653 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5654 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5655 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5656 const PetscInt *idxs; 5657 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5658 5659 PetscCall(ISGetLocalSize(pcbddc->is_R_local,&nl)); 5660 PetscCall(ISGetIndices(pcbddc->is_R_local,&idxs)); 5661 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5662 for (i=0;i<nl;i++) { 5663 for (d=0;d<cdim;d++) { 5664 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5665 } 5666 } 5667 PetscCall(ISRestoreIndices(pcbddc->is_R_local,&idxs)); 5668 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5669 PetscCall(PetscFree(scoords)); 5670 } 5671 5672 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5673 if (!n_R) { 5674 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5675 PetscCall(PCSetType(pc_temp,PCNONE)); 5676 } 5677 /* Reuse solver if it is present */ 5678 if (reuse_neumann_solver) { 5679 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5680 5681 PetscCall(KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver)); 5682 } 5683 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5684 } 5685 5686 if (pcbddc->dbg_flag) { 5687 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5688 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5689 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5690 } 5691 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5692 5693 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5694 if (pcbddc->NullSpace_corr[0]) { 5695 PetscCall(PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE)); 5696 } 5697 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5698 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1])); 5699 } 5700 if (neumann && pcbddc->NullSpace_corr[2]) { 5701 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3])); 5702 } 5703 /* check Dirichlet and Neumann solvers */ 5704 if (pcbddc->dbg_flag) { 5705 if (dirichlet) { /* Dirichlet */ 5706 PetscCall(VecSetRandom(pcis->vec1_D,NULL)); 5707 PetscCall(MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D)); 5708 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D)); 5709 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 5710 PetscCall(VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D)); 5711 PetscCall(VecNorm(pcis->vec1_D,NORM_INFINITY,&value)); 5712 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,(double)value)); 5713 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5714 } 5715 if (neumann) { /* Neumann */ 5716 PetscCall(VecSetRandom(pcbddc->vec1_R,NULL)); 5717 PetscCall(MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R)); 5718 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R)); 5719 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 5720 PetscCall(VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R)); 5721 PetscCall(VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value)); 5722 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,(double)value)); 5723 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5724 } 5725 } 5726 /* free Neumann problem's matrix */ 5727 PetscCall(MatDestroy(&A_RR)); 5728 PetscFunctionReturn(0); 5729 } 5730 5731 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5732 { 5733 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5734 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5735 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5736 5737 PetscFunctionBegin; 5738 if (!reuse_solver) { 5739 PetscCall(VecSet(pcbddc->vec1_R,0.)); 5740 } 5741 if (!pcbddc->switch_static) { 5742 if (applytranspose && pcbddc->local_auxmat1) { 5743 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C)); 5744 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5745 } 5746 if (!reuse_solver) { 5747 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5748 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5749 } else { 5750 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5751 5752 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5753 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5754 } 5755 } else { 5756 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5757 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5758 PetscCall(VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5759 PetscCall(VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5760 if (applytranspose && pcbddc->local_auxmat1) { 5761 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C)); 5762 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5763 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5764 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5765 } 5766 } 5767 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5768 if (!reuse_solver || pcbddc->switch_static) { 5769 if (applytranspose) { 5770 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5771 } else { 5772 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5773 } 5774 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R)); 5775 } else { 5776 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5777 5778 if (applytranspose) { 5779 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5780 } else { 5781 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5782 } 5783 } 5784 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5785 PetscCall(VecSet(inout_B,0.)); 5786 if (!pcbddc->switch_static) { 5787 if (!reuse_solver) { 5788 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5789 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5790 } else { 5791 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5792 5793 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5794 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5795 } 5796 if (!applytranspose && pcbddc->local_auxmat1) { 5797 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5798 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B)); 5799 } 5800 } else { 5801 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5802 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5803 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5804 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5805 if (!applytranspose && pcbddc->local_auxmat1) { 5806 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5807 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R)); 5808 } 5809 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5810 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5811 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5812 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5813 } 5814 PetscFunctionReturn(0); 5815 } 5816 5817 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5818 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5819 { 5820 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5821 PC_IS* pcis = (PC_IS*) (pc->data); 5822 const PetscScalar zero = 0.0; 5823 5824 PetscFunctionBegin; 5825 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5826 if (!pcbddc->benign_apply_coarse_only) { 5827 if (applytranspose) { 5828 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P)); 5829 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5830 } else { 5831 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P)); 5832 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5833 } 5834 } else { 5835 PetscCall(VecSet(pcbddc->vec1_P,zero)); 5836 } 5837 5838 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5839 if (pcbddc->benign_n) { 5840 PetscScalar *array; 5841 PetscInt j; 5842 5843 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5844 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5845 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5846 } 5847 5848 /* start communications from local primal nodes to rhs of coarse solver */ 5849 PetscCall(VecSet(pcbddc->coarse_vec,zero)); 5850 PetscCall(PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD)); 5851 PetscCall(PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD)); 5852 5853 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5854 if (pcbddc->coarse_ksp) { 5855 Mat coarse_mat; 5856 Vec rhs,sol; 5857 MatNullSpace nullsp; 5858 PetscBool isbddc = PETSC_FALSE; 5859 5860 if (pcbddc->benign_have_null) { 5861 PC coarse_pc; 5862 5863 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5864 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 5865 /* we need to propagate to coarser levels the need for a possible benign correction */ 5866 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5867 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5868 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5869 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5870 } 5871 } 5872 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&rhs)); 5873 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&sol)); 5874 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 5875 if (applytranspose) { 5876 PetscCheck(!pcbddc->benign_apply_coarse_only,PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5877 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5878 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol)); 5879 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5880 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5881 PetscCall(MatGetTransposeNullSpace(coarse_mat,&nullsp)); 5882 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp,sol)); 5883 } else { 5884 PetscCall(MatGetNullSpace(coarse_mat,&nullsp)); 5885 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5886 PC coarse_pc; 5887 5888 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp,rhs)); 5889 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5890 PetscCall(PCPreSolve(coarse_pc,pcbddc->coarse_ksp)); 5891 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol)); 5892 PetscCall(PCPostSolve(coarse_pc,pcbddc->coarse_ksp)); 5893 } else { 5894 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5895 PetscCall(KSPSolve(pcbddc->coarse_ksp,rhs,sol)); 5896 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5897 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5898 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp,sol)); 5899 } 5900 } 5901 /* we don't need the benign correction at coarser levels anymore */ 5902 if (pcbddc->benign_have_null && isbddc) { 5903 PC coarse_pc; 5904 PC_BDDC* coarsepcbddc; 5905 5906 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5907 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5908 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5909 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5910 } 5911 } 5912 5913 /* Local solution on R nodes */ 5914 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5915 PetscCall(PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose)); 5916 } 5917 /* communications from coarse sol to local primal nodes */ 5918 PetscCall(PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE)); 5919 PetscCall(PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE)); 5920 5921 /* Sum contributions from the two levels */ 5922 if (!pcbddc->benign_apply_coarse_only) { 5923 if (applytranspose) { 5924 PetscCall(MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5925 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5926 } else { 5927 PetscCall(MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5928 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5929 } 5930 /* store p0 */ 5931 if (pcbddc->benign_n) { 5932 PetscScalar *array; 5933 PetscInt j; 5934 5935 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5936 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5937 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5938 } 5939 } else { /* expand the coarse solution */ 5940 if (applytranspose) { 5941 PetscCall(MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B)); 5942 } else { 5943 PetscCall(MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B)); 5944 } 5945 } 5946 PetscFunctionReturn(0); 5947 } 5948 5949 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5950 { 5951 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5952 Vec from,to; 5953 const PetscScalar *array; 5954 5955 PetscFunctionBegin; 5956 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5957 from = pcbddc->coarse_vec; 5958 to = pcbddc->vec1_P; 5959 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5960 Vec tvec; 5961 5962 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5963 PetscCall(VecResetArray(tvec)); 5964 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&tvec)); 5965 PetscCall(VecGetArrayRead(tvec,&array)); 5966 PetscCall(VecPlaceArray(from,array)); 5967 PetscCall(VecRestoreArrayRead(tvec,&array)); 5968 } 5969 } else { /* from local to global -> put data in coarse right hand side */ 5970 from = pcbddc->vec1_P; 5971 to = pcbddc->coarse_vec; 5972 } 5973 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 5974 PetscFunctionReturn(0); 5975 } 5976 5977 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5978 { 5979 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5980 Vec from,to; 5981 const PetscScalar *array; 5982 5983 PetscFunctionBegin; 5984 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5985 from = pcbddc->coarse_vec; 5986 to = pcbddc->vec1_P; 5987 } else { /* from local to global -> put data in coarse right hand side */ 5988 from = pcbddc->vec1_P; 5989 to = pcbddc->coarse_vec; 5990 } 5991 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 5992 if (smode == SCATTER_FORWARD) { 5993 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5994 Vec tvec; 5995 5996 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5997 PetscCall(VecGetArrayRead(to,&array)); 5998 PetscCall(VecPlaceArray(tvec,array)); 5999 PetscCall(VecRestoreArrayRead(to,&array)); 6000 } 6001 } else { 6002 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6003 PetscCall(VecResetArray(from)); 6004 } 6005 } 6006 PetscFunctionReturn(0); 6007 } 6008 6009 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6010 { 6011 PC_IS* pcis = (PC_IS*)(pc->data); 6012 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6013 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6014 /* one and zero */ 6015 PetscScalar one=1.0,zero=0.0; 6016 /* space to store constraints and their local indices */ 6017 PetscScalar *constraints_data; 6018 PetscInt *constraints_idxs,*constraints_idxs_B; 6019 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6020 PetscInt *constraints_n; 6021 /* iterators */ 6022 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6023 /* BLAS integers */ 6024 PetscBLASInt lwork,lierr; 6025 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6026 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6027 /* reuse */ 6028 PetscInt olocal_primal_size,olocal_primal_size_cc; 6029 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6030 /* change of basis */ 6031 PetscBool qr_needed; 6032 PetscBT change_basis,qr_needed_idx; 6033 /* auxiliary stuff */ 6034 PetscInt *nnz,*is_indices; 6035 PetscInt ncc; 6036 /* some quantities */ 6037 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6038 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6039 PetscReal tol; /* tolerance for retaining eigenmodes */ 6040 6041 PetscFunctionBegin; 6042 tol = PetscSqrtReal(PETSC_SMALL); 6043 /* Destroy Mat objects computed previously */ 6044 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6045 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6046 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6047 /* save info on constraints from previous setup (if any) */ 6048 olocal_primal_size = pcbddc->local_primal_size; 6049 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6050 PetscCall(PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult)); 6051 PetscCall(PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc)); 6052 PetscCall(PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc)); 6053 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 6054 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6055 6056 if (!pcbddc->adaptive_selection) { 6057 IS ISForVertices,*ISForFaces,*ISForEdges; 6058 MatNullSpace nearnullsp; 6059 const Vec *nearnullvecs; 6060 Vec *localnearnullsp; 6061 PetscScalar *array; 6062 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size,o_nf,o_ne; 6063 PetscBool nnsp_has_cnst; 6064 /* LAPACK working arrays for SVD or POD */ 6065 PetscBool skip_lapack,boolforchange; 6066 PetscScalar *work; 6067 PetscReal *singular_vals; 6068 #if defined(PETSC_USE_COMPLEX) 6069 PetscReal *rwork; 6070 #endif 6071 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6072 PetscBLASInt dummy_int=1; 6073 PetscScalar dummy_scalar=1.; 6074 PetscBool use_pod = PETSC_FALSE; 6075 6076 /* MKL SVD with same input gives different results on different processes! */ 6077 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6078 use_pod = PETSC_TRUE; 6079 #endif 6080 /* Get index sets for faces, edges and vertices from graph */ 6081 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices)); 6082 o_nf = n_ISForFaces; 6083 o_ne = n_ISForEdges; 6084 n_vertices = 0; 6085 if (ISForVertices) PetscCall(ISGetSize(ISForVertices,&n_vertices)); 6086 /* print some info */ 6087 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6088 6089 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6090 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 6091 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6092 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6093 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n",PetscGlobalRank,n_vertices,pcbddc->use_vertices)); 6094 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges)); 6095 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces)); 6096 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6097 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6098 } 6099 6100 if (!pcbddc->use_vertices) n_vertices = 0; 6101 if (!pcbddc->use_edges) n_ISForEdges = 0; 6102 if (!pcbddc->use_faces) n_ISForFaces = 0; 6103 6104 /* check if near null space is attached to global mat */ 6105 if (pcbddc->use_nnsp) { 6106 PetscCall(MatGetNearNullSpace(pc->pmat,&nearnullsp)); 6107 } else nearnullsp = NULL; 6108 6109 if (nearnullsp) { 6110 PetscCall(MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs)); 6111 /* remove any stored info */ 6112 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6113 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6114 /* store information for BDDC solver reuse */ 6115 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6116 pcbddc->onearnullspace = nearnullsp; 6117 PetscCall(PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state)); 6118 for (i=0;i<nnsp_size;i++) { 6119 PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i])); 6120 } 6121 } else { /* if near null space is not provided BDDC uses constants by default */ 6122 nnsp_size = 0; 6123 nnsp_has_cnst = PETSC_TRUE; 6124 } 6125 /* get max number of constraints on a single cc */ 6126 max_constraints = nnsp_size; 6127 if (nnsp_has_cnst) max_constraints++; 6128 6129 /* 6130 Evaluate maximum storage size needed by the procedure 6131 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6132 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6133 There can be multiple constraints per connected component 6134 */ 6135 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6136 PetscCall(PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n)); 6137 6138 total_counts = n_ISForFaces+n_ISForEdges; 6139 total_counts *= max_constraints; 6140 total_counts += n_vertices; 6141 PetscCall(PetscBTCreate(total_counts,&change_basis)); 6142 6143 total_counts = 0; 6144 max_size_of_constraint = 0; 6145 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6146 IS used_is; 6147 if (i<n_ISForEdges) { 6148 used_is = ISForEdges[i]; 6149 } else { 6150 used_is = ISForFaces[i-n_ISForEdges]; 6151 } 6152 PetscCall(ISGetSize(used_is,&j)); 6153 total_counts += j; 6154 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6155 } 6156 PetscCall(PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B)); 6157 6158 /* get local part of global near null space vectors */ 6159 PetscCall(PetscMalloc1(nnsp_size,&localnearnullsp)); 6160 for (k=0;k<nnsp_size;k++) { 6161 PetscCall(VecDuplicate(pcis->vec1_N,&localnearnullsp[k])); 6162 PetscCall(VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6163 PetscCall(VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6164 } 6165 6166 /* whether or not to skip lapack calls */ 6167 skip_lapack = PETSC_TRUE; 6168 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6169 6170 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6171 if (!skip_lapack) { 6172 PetscScalar temp_work; 6173 6174 if (use_pod) { 6175 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6176 PetscCall(PetscMalloc1(max_constraints*max_constraints,&correlation_mat)); 6177 PetscCall(PetscMalloc1(max_constraints,&singular_vals)); 6178 PetscCall(PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis)); 6179 #if defined(PETSC_USE_COMPLEX) 6180 PetscCall(PetscMalloc1(3*max_constraints,&rwork)); 6181 #endif 6182 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6183 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6184 PetscCall(PetscBLASIntCast(max_constraints,&Blas_LDA)); 6185 lwork = -1; 6186 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6187 #if !defined(PETSC_USE_COMPLEX) 6188 PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6189 #else 6190 PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6191 #endif 6192 PetscCall(PetscFPTrapPop()); 6193 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6194 } else { 6195 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6196 /* SVD */ 6197 PetscInt max_n,min_n; 6198 max_n = max_size_of_constraint; 6199 min_n = max_constraints; 6200 if (max_size_of_constraint < max_constraints) { 6201 min_n = max_size_of_constraint; 6202 max_n = max_constraints; 6203 } 6204 PetscCall(PetscMalloc1(min_n,&singular_vals)); 6205 #if defined(PETSC_USE_COMPLEX) 6206 PetscCall(PetscMalloc1(5*min_n,&rwork)); 6207 #endif 6208 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6209 lwork = -1; 6210 PetscCall(PetscBLASIntCast(max_n,&Blas_M)); 6211 PetscCall(PetscBLASIntCast(min_n,&Blas_N)); 6212 PetscCall(PetscBLASIntCast(max_n,&Blas_LDA)); 6213 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6214 #if !defined(PETSC_USE_COMPLEX) 6215 PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6216 #else 6217 PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6218 #endif 6219 PetscCall(PetscFPTrapPop()); 6220 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6221 #else 6222 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6223 #endif /* on missing GESVD */ 6224 } 6225 /* Allocate optimal workspace */ 6226 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork)); 6227 PetscCall(PetscMalloc1(lwork,&work)); 6228 } 6229 /* Now we can loop on constraining sets */ 6230 total_counts = 0; 6231 constraints_idxs_ptr[0] = 0; 6232 constraints_data_ptr[0] = 0; 6233 /* vertices */ 6234 if (n_vertices) { 6235 PetscCall(ISGetIndices(ISForVertices,(const PetscInt**)&is_indices)); 6236 PetscCall(PetscArraycpy(constraints_idxs,is_indices,n_vertices)); 6237 for (i=0;i<n_vertices;i++) { 6238 constraints_n[total_counts] = 1; 6239 constraints_data[total_counts] = 1.0; 6240 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6241 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6242 total_counts++; 6243 } 6244 PetscCall(ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices)); 6245 } 6246 6247 /* edges and faces */ 6248 total_counts_cc = total_counts; 6249 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6250 IS used_is; 6251 PetscBool idxs_copied = PETSC_FALSE; 6252 6253 if (ncc<n_ISForEdges) { 6254 used_is = ISForEdges[ncc]; 6255 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6256 } else { 6257 used_is = ISForFaces[ncc-n_ISForEdges]; 6258 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6259 } 6260 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6261 6262 PetscCall(ISGetSize(used_is,&size_of_constraint)); 6263 if (!size_of_constraint) continue; 6264 PetscCall(ISGetIndices(used_is,(const PetscInt**)&is_indices)); 6265 /* change of basis should not be performed on local periodic nodes */ 6266 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6267 if (nnsp_has_cnst) { 6268 PetscScalar quad_value; 6269 6270 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6271 idxs_copied = PETSC_TRUE; 6272 6273 if (!pcbddc->use_nnsp_true) { 6274 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6275 } else { 6276 quad_value = 1.0; 6277 } 6278 for (j=0;j<size_of_constraint;j++) { 6279 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6280 } 6281 temp_constraints++; 6282 total_counts++; 6283 } 6284 for (k=0;k<nnsp_size;k++) { 6285 PetscReal real_value; 6286 PetscScalar *ptr_to_data; 6287 6288 PetscCall(VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6289 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6290 for (j=0;j<size_of_constraint;j++) { 6291 ptr_to_data[j] = array[is_indices[j]]; 6292 } 6293 PetscCall(VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6294 /* check if array is null on the connected component */ 6295 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6296 PetscCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6297 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6298 temp_constraints++; 6299 total_counts++; 6300 if (!idxs_copied) { 6301 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6302 idxs_copied = PETSC_TRUE; 6303 } 6304 } 6305 } 6306 PetscCall(ISRestoreIndices(used_is,(const PetscInt**)&is_indices)); 6307 valid_constraints = temp_constraints; 6308 if (!pcbddc->use_nnsp_true && temp_constraints) { 6309 if (temp_constraints == 1) { /* just normalize the constraint */ 6310 PetscScalar norm,*ptr_to_data; 6311 6312 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6313 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6314 PetscCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6315 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6316 PetscCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6317 } else { /* perform SVD */ 6318 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6319 6320 if (use_pod) { 6321 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6322 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6323 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6324 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6325 from that computed using LAPACKgesvd 6326 -> This is due to a different computation of eigenvectors in LAPACKheev 6327 -> The quality of the POD-computed basis will be the same */ 6328 PetscCall(PetscArrayzero(correlation_mat,temp_constraints*temp_constraints)); 6329 /* Store upper triangular part of correlation matrix */ 6330 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6331 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6332 for (j=0;j<temp_constraints;j++) { 6333 for (k=0;k<j+1;k++) { 6334 PetscCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6335 } 6336 } 6337 /* compute eigenvalues and eigenvectors of correlation matrix */ 6338 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6339 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDA)); 6340 #if !defined(PETSC_USE_COMPLEX) 6341 PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6342 #else 6343 PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6344 #endif 6345 PetscCall(PetscFPTrapPop()); 6346 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6347 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6348 j = 0; 6349 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6350 total_counts = total_counts-j; 6351 valid_constraints = temp_constraints-j; 6352 /* scale and copy POD basis into used quadrature memory */ 6353 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6354 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6355 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_K)); 6356 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6357 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDB)); 6358 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6359 if (j<temp_constraints) { 6360 PetscInt ii; 6361 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6362 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6363 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6364 PetscCall(PetscFPTrapPop()); 6365 for (k=0;k<temp_constraints-j;k++) { 6366 for (ii=0;ii<size_of_constraint;ii++) { 6367 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6368 } 6369 } 6370 } 6371 } else { 6372 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6373 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6374 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6375 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6376 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6377 #if !defined(PETSC_USE_COMPLEX) 6378 PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6379 #else 6380 PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6381 #endif 6382 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6383 PetscCall(PetscFPTrapPop()); 6384 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6385 k = temp_constraints; 6386 if (k > size_of_constraint) k = size_of_constraint; 6387 j = 0; 6388 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6389 valid_constraints = k-j; 6390 total_counts = total_counts-temp_constraints+valid_constraints; 6391 #else 6392 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6393 #endif /* on missing GESVD */ 6394 } 6395 } 6396 } 6397 /* update pointers information */ 6398 if (valid_constraints) { 6399 constraints_n[total_counts_cc] = valid_constraints; 6400 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6401 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6402 /* set change_of_basis flag */ 6403 if (boolforchange) { 6404 PetscBTSet(change_basis,total_counts_cc); 6405 } 6406 total_counts_cc++; 6407 } 6408 } 6409 /* free workspace */ 6410 if (!skip_lapack) { 6411 PetscCall(PetscFree(work)); 6412 #if defined(PETSC_USE_COMPLEX) 6413 PetscCall(PetscFree(rwork)); 6414 #endif 6415 PetscCall(PetscFree(singular_vals)); 6416 PetscCall(PetscFree(correlation_mat)); 6417 PetscCall(PetscFree(temp_basis)); 6418 } 6419 for (k=0;k<nnsp_size;k++) { 6420 PetscCall(VecDestroy(&localnearnullsp[k])); 6421 } 6422 PetscCall(PetscFree(localnearnullsp)); 6423 /* free index sets of faces, edges and vertices */ 6424 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,&o_nf,&ISForFaces,&o_ne,&ISForEdges,&ISForVertices)); 6425 } else { 6426 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6427 6428 total_counts = 0; 6429 n_vertices = 0; 6430 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6431 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 6432 } 6433 max_constraints = 0; 6434 total_counts_cc = 0; 6435 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6436 total_counts += pcbddc->adaptive_constraints_n[i]; 6437 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6438 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6439 } 6440 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6441 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6442 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6443 constraints_data = pcbddc->adaptive_constraints_data; 6444 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6445 PetscCall(PetscMalloc1(total_counts_cc,&constraints_n)); 6446 total_counts_cc = 0; 6447 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6448 if (pcbddc->adaptive_constraints_n[i]) { 6449 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6450 } 6451 } 6452 6453 max_size_of_constraint = 0; 6454 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6455 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B)); 6456 /* Change of basis */ 6457 PetscCall(PetscBTCreate(total_counts_cc,&change_basis)); 6458 if (pcbddc->use_change_of_basis) { 6459 for (i=0;i<sub_schurs->n_subs;i++) { 6460 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6461 PetscCall(PetscBTSet(change_basis,i+n_vertices)); 6462 } 6463 } 6464 } 6465 } 6466 pcbddc->local_primal_size = total_counts; 6467 PetscCall(PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs)); 6468 6469 /* map constraints_idxs in boundary numbering */ 6470 if (pcbddc->use_change_of_basis) { 6471 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B)); 6472 PetscCheck(i == constraints_idxs_ptr[total_counts_cc],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT,constraints_idxs_ptr[total_counts_cc],i); 6473 } 6474 6475 /* Create constraint matrix */ 6476 PetscCall(MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix)); 6477 PetscCall(MatSetType(pcbddc->ConstraintMatrix,MATAIJ)); 6478 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n)); 6479 6480 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6481 /* determine if a QR strategy is needed for change of basis */ 6482 qr_needed = pcbddc->use_qr_single; 6483 PetscCall(PetscBTCreate(total_counts_cc,&qr_needed_idx)); 6484 total_primal_vertices=0; 6485 pcbddc->local_primal_size_cc = 0; 6486 for (i=0;i<total_counts_cc;i++) { 6487 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6488 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6489 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6490 pcbddc->local_primal_size_cc += 1; 6491 } else if (PetscBTLookup(change_basis,i)) { 6492 for (k=0;k<constraints_n[i];k++) { 6493 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6494 } 6495 pcbddc->local_primal_size_cc += constraints_n[i]; 6496 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6497 PetscBTSet(qr_needed_idx,i); 6498 qr_needed = PETSC_TRUE; 6499 } 6500 } else { 6501 pcbddc->local_primal_size_cc += 1; 6502 } 6503 } 6504 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6505 pcbddc->n_vertices = total_primal_vertices; 6506 /* permute indices in order to have a sorted set of vertices */ 6507 PetscCall(PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs)); 6508 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult)); 6509 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices)); 6510 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6511 6512 /* nonzero structure of constraint matrix */ 6513 /* and get reference dof for local constraints */ 6514 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&nnz)); 6515 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6516 6517 j = total_primal_vertices; 6518 total_counts = total_primal_vertices; 6519 cum = total_primal_vertices; 6520 for (i=n_vertices;i<total_counts_cc;i++) { 6521 if (!PetscBTLookup(change_basis,i)) { 6522 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6523 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6524 cum++; 6525 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6526 for (k=0;k<constraints_n[i];k++) { 6527 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6528 nnz[j+k] = size_of_constraint; 6529 } 6530 j += constraints_n[i]; 6531 } 6532 } 6533 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz)); 6534 PetscCall(MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6535 PetscCall(PetscFree(nnz)); 6536 6537 /* set values in constraint matrix */ 6538 for (i=0;i<total_primal_vertices;i++) { 6539 PetscCall(MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES)); 6540 } 6541 total_counts = total_primal_vertices; 6542 for (i=n_vertices;i<total_counts_cc;i++) { 6543 if (!PetscBTLookup(change_basis,i)) { 6544 PetscInt *cols; 6545 6546 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6547 cols = constraints_idxs+constraints_idxs_ptr[i]; 6548 for (k=0;k<constraints_n[i];k++) { 6549 PetscInt row = total_counts+k; 6550 PetscScalar *vals; 6551 6552 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6553 PetscCall(MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES)); 6554 } 6555 total_counts += constraints_n[i]; 6556 } 6557 } 6558 /* assembling */ 6559 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6560 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6561 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view")); 6562 6563 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6564 if (pcbddc->use_change_of_basis) { 6565 /* dual and primal dofs on a single cc */ 6566 PetscInt dual_dofs,primal_dofs; 6567 /* working stuff for GEQRF */ 6568 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6569 PetscBLASInt lqr_work; 6570 /* working stuff for UNGQR */ 6571 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6572 PetscBLASInt lgqr_work; 6573 /* working stuff for TRTRS */ 6574 PetscScalar *trs_rhs = NULL; 6575 PetscBLASInt Blas_NRHS; 6576 /* pointers for values insertion into change of basis matrix */ 6577 PetscInt *start_rows,*start_cols; 6578 PetscScalar *start_vals; 6579 /* working stuff for values insertion */ 6580 PetscBT is_primal; 6581 PetscInt *aux_primal_numbering_B; 6582 /* matrix sizes */ 6583 PetscInt global_size,local_size; 6584 /* temporary change of basis */ 6585 Mat localChangeOfBasisMatrix; 6586 /* extra space for debugging */ 6587 PetscScalar *dbg_work = NULL; 6588 6589 PetscCall(MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix)); 6590 PetscCall(MatSetType(localChangeOfBasisMatrix,MATAIJ)); 6591 PetscCall(MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n)); 6592 /* nonzeros for local mat */ 6593 PetscCall(PetscMalloc1(pcis->n,&nnz)); 6594 if (!pcbddc->benign_change || pcbddc->fake_change) { 6595 for (i=0;i<pcis->n;i++) nnz[i]=1; 6596 } else { 6597 const PetscInt *ii; 6598 PetscInt n; 6599 PetscBool flg_row; 6600 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6601 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6602 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6603 } 6604 for (i=n_vertices;i<total_counts_cc;i++) { 6605 if (PetscBTLookup(change_basis,i)) { 6606 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6607 if (PetscBTLookup(qr_needed_idx,i)) { 6608 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6609 } else { 6610 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6611 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6612 } 6613 } 6614 } 6615 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz)); 6616 PetscCall(MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6617 PetscCall(PetscFree(nnz)); 6618 /* Set interior change in the matrix */ 6619 if (!pcbddc->benign_change || pcbddc->fake_change) { 6620 for (i=0;i<pcis->n;i++) { 6621 PetscCall(MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES)); 6622 } 6623 } else { 6624 const PetscInt *ii,*jj; 6625 PetscScalar *aa; 6626 PetscInt n; 6627 PetscBool flg_row; 6628 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6629 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change,&aa)); 6630 for (i=0;i<n;i++) { 6631 PetscCall(MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES)); 6632 } 6633 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change,&aa)); 6634 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6635 } 6636 6637 if (pcbddc->dbg_flag) { 6638 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6639 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank)); 6640 } 6641 6642 /* Now we loop on the constraints which need a change of basis */ 6643 /* 6644 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6645 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6646 6647 Basic blocks of change of basis matrix T computed: 6648 6649 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6650 6651 | 1 0 ... 0 s_1/S | 6652 | 0 1 ... 0 s_2/S | 6653 | ... | 6654 | 0 ... 1 s_{n-1}/S | 6655 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6656 6657 with S = \sum_{i=1}^n s_i^2 6658 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6659 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6660 6661 - QR decomposition of constraints otherwise 6662 */ 6663 if (qr_needed && max_size_of_constraint) { 6664 /* space to store Q */ 6665 PetscCall(PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis)); 6666 /* array to store scaling factors for reflectors */ 6667 PetscCall(PetscMalloc1(max_constraints,&qr_tau)); 6668 /* first we issue queries for optimal work */ 6669 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6670 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6671 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6672 lqr_work = -1; 6673 PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6674 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6675 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work)); 6676 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work)); 6677 lgqr_work = -1; 6678 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6679 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_N)); 6680 PetscCall(PetscBLASIntCast(max_constraints,&Blas_K)); 6681 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6682 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6683 PetscCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6684 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6685 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work)); 6686 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work)); 6687 /* array to store rhs and solution of triangular solver */ 6688 PetscCall(PetscMalloc1(max_constraints*max_constraints,&trs_rhs)); 6689 /* allocating workspace for check */ 6690 if (pcbddc->dbg_flag) { 6691 PetscCall(PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work)); 6692 } 6693 } 6694 /* array to store whether a node is primal or not */ 6695 PetscCall(PetscBTCreate(pcis->n_B,&is_primal)); 6696 PetscCall(PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B)); 6697 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B)); 6698 PetscCheck(i == total_primal_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT,total_primal_vertices,i); 6699 for (i=0;i<total_primal_vertices;i++) { 6700 PetscCall(PetscBTSet(is_primal,aux_primal_numbering_B[i])); 6701 } 6702 PetscCall(PetscFree(aux_primal_numbering_B)); 6703 6704 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6705 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6706 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6707 if (PetscBTLookup(change_basis,total_counts)) { 6708 /* get constraint info */ 6709 primal_dofs = constraints_n[total_counts]; 6710 dual_dofs = size_of_constraint-primal_dofs; 6711 6712 if (pcbddc->dbg_flag) { 6713 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n",total_counts,primal_dofs,size_of_constraint)); 6714 } 6715 6716 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6717 6718 /* copy quadrature constraints for change of basis check */ 6719 if (pcbddc->dbg_flag) { 6720 PetscCall(PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6721 } 6722 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6723 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6724 6725 /* compute QR decomposition of constraints */ 6726 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6727 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6728 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6729 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6730 PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6731 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6732 PetscCall(PetscFPTrapPop()); 6733 6734 /* explicitly compute R^-T */ 6735 PetscCall(PetscArrayzero(trs_rhs,primal_dofs*primal_dofs)); 6736 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6737 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6738 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_NRHS)); 6739 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6740 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6741 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6742 PetscCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6743 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6744 PetscCall(PetscFPTrapPop()); 6745 6746 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6747 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6748 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6749 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6750 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6751 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6752 PetscCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6753 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6754 PetscCall(PetscFPTrapPop()); 6755 6756 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6757 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6758 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6759 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6760 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6761 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6762 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6763 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6764 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6765 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6766 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6767 PetscCall(PetscFPTrapPop()); 6768 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6769 6770 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6771 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6772 /* insert cols for primal dofs */ 6773 for (j=0;j<primal_dofs;j++) { 6774 start_vals = &qr_basis[j*size_of_constraint]; 6775 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6776 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6777 } 6778 /* insert cols for dual dofs */ 6779 for (j=0,k=0;j<dual_dofs;k++) { 6780 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6781 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6782 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6783 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6784 j++; 6785 } 6786 } 6787 6788 /* check change of basis */ 6789 if (pcbddc->dbg_flag) { 6790 PetscInt ii,jj; 6791 PetscBool valid_qr=PETSC_TRUE; 6792 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_M)); 6793 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6794 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_K)); 6795 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6796 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDB)); 6797 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDC)); 6798 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6799 PetscCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6800 PetscCall(PetscFPTrapPop()); 6801 for (jj=0;jj<size_of_constraint;jj++) { 6802 for (ii=0;ii<primal_dofs;ii++) { 6803 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6804 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6805 } 6806 } 6807 if (!valid_qr) { 6808 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n")); 6809 for (jj=0;jj<size_of_constraint;jj++) { 6810 for (ii=0;ii<primal_dofs;ii++) { 6811 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6812 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n",jj,ii,(double)PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6813 } 6814 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6815 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n",jj,ii,(double)PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6816 } 6817 } 6818 } 6819 } else { 6820 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n")); 6821 } 6822 } 6823 } else { /* simple transformation block */ 6824 PetscInt row,col; 6825 PetscScalar val,norm; 6826 6827 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6828 PetscCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6829 for (j=0;j<size_of_constraint;j++) { 6830 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6831 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6832 if (!PetscBTLookup(is_primal,row_B)) { 6833 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6834 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES)); 6835 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES)); 6836 } else { 6837 for (k=0;k<size_of_constraint;k++) { 6838 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6839 if (row != col) { 6840 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6841 } else { 6842 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6843 } 6844 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES)); 6845 } 6846 } 6847 } 6848 if (pcbddc->dbg_flag) { 6849 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n")); 6850 } 6851 } 6852 } else { 6853 if (pcbddc->dbg_flag) { 6854 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n",total_counts,size_of_constraint)); 6855 } 6856 } 6857 } 6858 6859 /* free workspace */ 6860 if (qr_needed) { 6861 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 6862 PetscCall(PetscFree(trs_rhs)); 6863 PetscCall(PetscFree(qr_tau)); 6864 PetscCall(PetscFree(qr_work)); 6865 PetscCall(PetscFree(gqr_work)); 6866 PetscCall(PetscFree(qr_basis)); 6867 } 6868 PetscCall(PetscBTDestroy(&is_primal)); 6869 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6870 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6871 6872 /* assembling of global change of variable */ 6873 if (!pcbddc->fake_change) { 6874 Mat tmat; 6875 PetscInt bs; 6876 6877 PetscCall(VecGetSize(pcis->vec1_global,&global_size)); 6878 PetscCall(VecGetLocalSize(pcis->vec1_global,&local_size)); 6879 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat)); 6880 PetscCall(MatISSetLocalMat(tmat,localChangeOfBasisMatrix)); 6881 PetscCall(MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY)); 6882 PetscCall(MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY)); 6883 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix)); 6884 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ)); 6885 PetscCall(MatGetBlockSize(pc->pmat,&bs)); 6886 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs)); 6887 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size)); 6888 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE)); 6889 PetscCall(MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix)); 6890 PetscCall(MatDestroy(&tmat)); 6891 PetscCall(VecSet(pcis->vec1_global,0.0)); 6892 PetscCall(VecSet(pcis->vec1_N,1.0)); 6893 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6894 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6895 PetscCall(VecReciprocal(pcis->vec1_global)); 6896 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL)); 6897 6898 /* check */ 6899 if (pcbddc->dbg_flag) { 6900 PetscReal error; 6901 Vec x,x_change; 6902 6903 PetscCall(VecDuplicate(pcis->vec1_global,&x)); 6904 PetscCall(VecDuplicate(pcis->vec1_global,&x_change)); 6905 PetscCall(VecSetRandom(x,NULL)); 6906 PetscCall(VecCopy(x,pcis->vec1_global)); 6907 PetscCall(VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6908 PetscCall(VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6909 PetscCall(MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N)); 6910 PetscCall(VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6911 PetscCall(VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6912 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change)); 6913 PetscCall(VecAXPY(x,-1.0,x_change)); 6914 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 6915 if (error > PETSC_SMALL) { 6916 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",(double)error); 6917 } 6918 PetscCall(VecDestroy(&x)); 6919 PetscCall(VecDestroy(&x_change)); 6920 } 6921 /* adapt sub_schurs computed (if any) */ 6922 if (pcbddc->use_deluxe_scaling) { 6923 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6924 6925 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6926 if (sub_schurs && sub_schurs->S_Ej_all) { 6927 Mat S_new,tmat; 6928 IS is_all_N,is_V_Sall = NULL; 6929 6930 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N)); 6931 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat)); 6932 if (pcbddc->deluxe_zerorows) { 6933 ISLocalToGlobalMapping NtoSall; 6934 IS is_V; 6935 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V)); 6936 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall)); 6937 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall)); 6938 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6939 PetscCall(ISDestroy(&is_V)); 6940 } 6941 PetscCall(ISDestroy(&is_all_N)); 6942 PetscCall(MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 6943 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6944 PetscCall(PetscObjectReference((PetscObject)S_new)); 6945 if (pcbddc->deluxe_zerorows) { 6946 const PetscScalar *array; 6947 const PetscInt *idxs_V,*idxs_all; 6948 PetscInt i,n_V; 6949 6950 PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 6951 PetscCall(ISGetLocalSize(is_V_Sall,&n_V)); 6952 PetscCall(ISGetIndices(is_V_Sall,&idxs_V)); 6953 PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs_all)); 6954 PetscCall(VecGetArrayRead(pcis->D,&array)); 6955 for (i=0;i<n_V;i++) { 6956 PetscScalar val; 6957 PetscInt idx; 6958 6959 idx = idxs_V[i]; 6960 val = array[idxs_all[idxs_V[i]]]; 6961 PetscCall(MatSetValue(S_new,idx,idx,val,INSERT_VALUES)); 6962 } 6963 PetscCall(MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY)); 6964 PetscCall(MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY)); 6965 PetscCall(VecRestoreArrayRead(pcis->D,&array)); 6966 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all)); 6967 PetscCall(ISRestoreIndices(is_V_Sall,&idxs_V)); 6968 } 6969 sub_schurs->S_Ej_all = S_new; 6970 PetscCall(MatDestroy(&S_new)); 6971 if (sub_schurs->sum_S_Ej_all) { 6972 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 6973 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 6974 PetscCall(PetscObjectReference((PetscObject)S_new)); 6975 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 6976 sub_schurs->sum_S_Ej_all = S_new; 6977 PetscCall(MatDestroy(&S_new)); 6978 } 6979 PetscCall(ISDestroy(&is_V_Sall)); 6980 PetscCall(MatDestroy(&tmat)); 6981 } 6982 /* destroy any change of basis context in sub_schurs */ 6983 if (sub_schurs && sub_schurs->change) { 6984 PetscInt i; 6985 6986 for (i=0;i<sub_schurs->n_subs;i++) { 6987 PetscCall(KSPDestroy(&sub_schurs->change[i])); 6988 } 6989 PetscCall(PetscFree(sub_schurs->change)); 6990 } 6991 } 6992 if (pcbddc->switch_static) { /* need to save the local change */ 6993 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6994 } else { 6995 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 6996 } 6997 /* determine if any process has changed the pressures locally */ 6998 pcbddc->change_interior = pcbddc->benign_have_null; 6999 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7000 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7001 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7002 pcbddc->use_qr_single = qr_needed; 7003 } 7004 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7005 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7006 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7007 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7008 } else { 7009 Mat benign_global = NULL; 7010 if (pcbddc->benign_have_null) { 7011 Mat M; 7012 7013 pcbddc->change_interior = PETSC_TRUE; 7014 PetscCall(VecCopy(matis->counter,pcis->vec1_N)); 7015 PetscCall(VecReciprocal(pcis->vec1_N)); 7016 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global)); 7017 if (pcbddc->benign_change) { 7018 PetscCall(MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M)); 7019 PetscCall(MatDiagonalScale(M,pcis->vec1_N,NULL)); 7020 } else { 7021 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M)); 7022 PetscCall(MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES)); 7023 } 7024 PetscCall(MatISSetLocalMat(benign_global,M)); 7025 PetscCall(MatDestroy(&M)); 7026 PetscCall(MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY)); 7027 PetscCall(MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY)); 7028 } 7029 if (pcbddc->user_ChangeOfBasisMatrix) { 7030 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix)); 7031 PetscCall(MatDestroy(&benign_global)); 7032 } else if (pcbddc->benign_have_null) { 7033 pcbddc->ChangeOfBasisMatrix = benign_global; 7034 } 7035 } 7036 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7037 IS is_global; 7038 const PetscInt *gidxs; 7039 7040 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping,&gidxs)); 7041 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global)); 7042 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping,&gidxs)); 7043 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change)); 7044 PetscCall(ISDestroy(&is_global)); 7045 } 7046 } 7047 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7048 PetscCall(VecDuplicate(pcis->vec1_global,&pcbddc->work_change)); 7049 } 7050 7051 if (!pcbddc->fake_change) { 7052 /* add pressure dofs to set of primal nodes for numbering purposes */ 7053 for (i=0;i<pcbddc->benign_n;i++) { 7054 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7055 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7056 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7057 pcbddc->local_primal_size_cc++; 7058 pcbddc->local_primal_size++; 7059 } 7060 7061 /* check if a new primal space has been introduced (also take into account benign trick) */ 7062 pcbddc->new_primal_space_local = PETSC_TRUE; 7063 if (olocal_primal_size == pcbddc->local_primal_size) { 7064 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7065 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7066 if (!pcbddc->new_primal_space_local) { 7067 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7068 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7069 } 7070 } 7071 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7072 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 7073 } 7074 PetscCall(PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult)); 7075 7076 /* flush dbg viewer */ 7077 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7078 7079 /* free workspace */ 7080 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7081 PetscCall(PetscBTDestroy(&change_basis)); 7082 if (!pcbddc->adaptive_selection) { 7083 PetscCall(PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n)); 7084 PetscCall(PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B)); 7085 } else { 7086 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n,pcbddc->adaptive_constraints_idxs_ptr,pcbddc->adaptive_constraints_data_ptr,pcbddc->adaptive_constraints_idxs,pcbddc->adaptive_constraints_data)); 7087 PetscCall(PetscFree(constraints_n)); 7088 PetscCall(PetscFree(constraints_idxs_B)); 7089 } 7090 PetscFunctionReturn(0); 7091 } 7092 7093 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7094 { 7095 ISLocalToGlobalMapping map; 7096 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7097 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7098 PetscInt i,N; 7099 PetscBool rcsr = PETSC_FALSE; 7100 7101 PetscFunctionBegin; 7102 if (pcbddc->recompute_topography) { 7103 pcbddc->graphanalyzed = PETSC_FALSE; 7104 /* Reset previously computed graph */ 7105 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7106 /* Init local Graph struct */ 7107 PetscCall(MatGetSize(pc->pmat,&N,NULL)); 7108 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&map,NULL)); 7109 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount)); 7110 7111 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7112 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local)); 7113 } 7114 /* Check validity of the csr graph passed in by the user */ 7115 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7116 7117 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7118 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7119 PetscInt *xadj,*adjncy; 7120 PetscInt nvtxs; 7121 PetscBool flg_row=PETSC_FALSE; 7122 7123 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7124 if (flg_row) { 7125 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES)); 7126 pcbddc->computed_rowadj = PETSC_TRUE; 7127 } 7128 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7129 rcsr = PETSC_TRUE; 7130 } 7131 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7132 7133 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7134 PetscReal *lcoords; 7135 PetscInt n; 7136 MPI_Datatype dimrealtype; 7137 7138 /* TODO: support for blocked */ 7139 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7140 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7141 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords)); 7142 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype)); 7143 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7144 PetscCall(PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7145 PetscCall(PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7146 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7147 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7148 7149 pcbddc->mat_graph->coords = lcoords; 7150 pcbddc->mat_graph->cloc = PETSC_TRUE; 7151 pcbddc->mat_graph->cnloc = n; 7152 } 7153 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7154 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7155 7156 /* Setup of Graph */ 7157 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7158 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local)); 7159 7160 /* attach info on disconnected subdomains if present */ 7161 if (pcbddc->n_local_subs) { 7162 PetscInt *local_subs,n,totn; 7163 7164 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7165 PetscCall(PetscMalloc1(n,&local_subs)); 7166 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7167 for (i=0;i<pcbddc->n_local_subs;i++) { 7168 const PetscInt *idxs; 7169 PetscInt nl,j; 7170 7171 PetscCall(ISGetLocalSize(pcbddc->local_subs[i],&nl)); 7172 PetscCall(ISGetIndices(pcbddc->local_subs[i],&idxs)); 7173 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7174 PetscCall(ISRestoreIndices(pcbddc->local_subs[i],&idxs)); 7175 } 7176 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7177 pcbddc->mat_graph->n_local_subs = totn + 1; 7178 pcbddc->mat_graph->local_subs = local_subs; 7179 } 7180 } 7181 7182 if (!pcbddc->graphanalyzed) { 7183 /* Graph's connected components analysis */ 7184 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7185 pcbddc->graphanalyzed = PETSC_TRUE; 7186 pcbddc->corner_selected = pcbddc->corner_selection; 7187 } 7188 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7189 PetscFunctionReturn(0); 7190 } 7191 7192 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7193 { 7194 PetscInt i,j,n; 7195 PetscScalar *alphas; 7196 PetscReal norm,*onorms; 7197 7198 PetscFunctionBegin; 7199 n = *nio; 7200 if (!n) PetscFunctionReturn(0); 7201 PetscCall(PetscMalloc2(n,&alphas,n,&onorms)); 7202 PetscCall(VecNormalize(vecs[0],&norm)); 7203 if (norm < PETSC_SMALL) { 7204 onorms[0] = 0.0; 7205 PetscCall(VecSet(vecs[0],0.0)); 7206 } else { 7207 onorms[0] = norm; 7208 } 7209 7210 for (i=1;i<n;i++) { 7211 PetscCall(VecMDot(vecs[i],i,vecs,alphas)); 7212 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7213 PetscCall(VecMAXPY(vecs[i],i,alphas,vecs)); 7214 PetscCall(VecNormalize(vecs[i],&norm)); 7215 if (norm < PETSC_SMALL) { 7216 onorms[i] = 0.0; 7217 PetscCall(VecSet(vecs[i],0.0)); 7218 } else { 7219 onorms[i] = norm; 7220 } 7221 } 7222 /* push nonzero vectors at the beginning */ 7223 for (i=0;i<n;i++) { 7224 if (onorms[i] == 0.0) { 7225 for (j=i+1;j<n;j++) { 7226 if (onorms[j] != 0.0) { 7227 PetscCall(VecCopy(vecs[j],vecs[i])); 7228 onorms[j] = 0.0; 7229 } 7230 } 7231 } 7232 } 7233 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7234 PetscCall(PetscFree2(alphas,onorms)); 7235 PetscFunctionReturn(0); 7236 } 7237 7238 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7239 { 7240 ISLocalToGlobalMapping mapping; 7241 Mat A; 7242 PetscInt n_neighs,*neighs,*n_shared,**shared; 7243 PetscMPIInt size,rank,color; 7244 PetscInt *xadj,*adjncy; 7245 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7246 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7247 PetscInt void_procs,*procs_candidates = NULL; 7248 PetscInt xadj_count,*count; 7249 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7250 PetscSubcomm psubcomm; 7251 MPI_Comm subcomm; 7252 7253 PetscFunctionBegin; 7254 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7255 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7256 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7257 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7258 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7259 PetscCheck(*n_subdomains >0,PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %" PetscInt_FMT,*n_subdomains); 7260 7261 if (have_void) *have_void = PETSC_FALSE; 7262 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size)); 7263 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 7264 PetscCall(MatISGetLocalMat(mat,&A)); 7265 PetscCall(MatGetLocalSize(A,&n,NULL)); 7266 im_active = !!n; 7267 PetscCall(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat))); 7268 void_procs = size - active_procs; 7269 /* get ranks of of non-active processes in mat communicator */ 7270 if (void_procs) { 7271 PetscInt ncand; 7272 7273 if (have_void) *have_void = PETSC_TRUE; 7274 PetscCall(PetscMalloc1(size,&procs_candidates)); 7275 PetscCallMPI(MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 7276 for (i=0,ncand=0;i<size;i++) { 7277 if (!procs_candidates[i]) { 7278 procs_candidates[ncand++] = i; 7279 } 7280 } 7281 /* force n_subdomains to be not greater that the number of non-active processes */ 7282 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7283 } 7284 7285 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7286 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7287 PetscCall(MatGetSize(mat,&N,NULL)); 7288 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7289 PetscInt issize,isidx,dest; 7290 if (*n_subdomains == 1) dest = 0; 7291 else dest = rank; 7292 if (im_active) { 7293 issize = 1; 7294 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7295 isidx = procs_candidates[dest]; 7296 } else { 7297 isidx = dest; 7298 } 7299 } else { 7300 issize = 0; 7301 isidx = -1; 7302 } 7303 if (*n_subdomains != 1) *n_subdomains = active_procs; 7304 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends)); 7305 PetscCall(PetscFree(procs_candidates)); 7306 PetscFunctionReturn(0); 7307 } 7308 PetscCall(PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL)); 7309 PetscCall(PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL)); 7310 threshold = PetscMax(threshold,2); 7311 7312 /* Get info on mapping */ 7313 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7314 PetscCall(ISLocalToGlobalMappingGetInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7315 7316 /* build local CSR graph of subdomains' connectivity */ 7317 PetscCall(PetscMalloc1(2,&xadj)); 7318 xadj[0] = 0; 7319 xadj[1] = PetscMax(n_neighs-1,0); 7320 PetscCall(PetscMalloc1(xadj[1],&adjncy)); 7321 PetscCall(PetscMalloc1(xadj[1],&adjncy_wgt)); 7322 PetscCall(PetscCalloc1(n,&count)); 7323 for (i=1;i<n_neighs;i++) 7324 for (j=0;j<n_shared[i];j++) 7325 count[shared[i][j]] += 1; 7326 7327 xadj_count = 0; 7328 for (i=1;i<n_neighs;i++) { 7329 for (j=0;j<n_shared[i];j++) { 7330 if (count[shared[i][j]] < threshold) { 7331 adjncy[xadj_count] = neighs[i]; 7332 adjncy_wgt[xadj_count] = n_shared[i]; 7333 xadj_count++; 7334 break; 7335 } 7336 } 7337 } 7338 xadj[1] = xadj_count; 7339 PetscCall(PetscFree(count)); 7340 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7341 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7342 7343 PetscCall(PetscMalloc1(1,&ranks_send_to_idx)); 7344 7345 /* Restrict work on active processes only */ 7346 PetscCall(PetscMPIIntCast(im_active,&color)); 7347 if (void_procs) { 7348 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm)); 7349 PetscCall(PetscSubcommSetNumber(psubcomm,2)); /* 2 groups, active process and not active processes */ 7350 PetscCall(PetscSubcommSetTypeGeneral(psubcomm,color,rank)); 7351 subcomm = PetscSubcommChild(psubcomm); 7352 } else { 7353 psubcomm = NULL; 7354 subcomm = PetscObjectComm((PetscObject)mat); 7355 } 7356 7357 v_wgt = NULL; 7358 if (!color) { 7359 PetscCall(PetscFree(xadj)); 7360 PetscCall(PetscFree(adjncy)); 7361 PetscCall(PetscFree(adjncy_wgt)); 7362 } else { 7363 Mat subdomain_adj; 7364 IS new_ranks,new_ranks_contig; 7365 MatPartitioning partitioner; 7366 PetscInt rstart=0,rend=0; 7367 PetscInt *is_indices,*oldranks; 7368 PetscMPIInt size; 7369 PetscBool aggregate; 7370 7371 PetscCallMPI(MPI_Comm_size(subcomm,&size)); 7372 if (void_procs) { 7373 PetscInt prank = rank; 7374 PetscCall(PetscMalloc1(size,&oldranks)); 7375 PetscCallMPI(MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm)); 7376 for (i=0;i<xadj[1];i++) { 7377 PetscCall(PetscFindInt(adjncy[i],size,oldranks,&adjncy[i])); 7378 } 7379 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7380 } else { 7381 oldranks = NULL; 7382 } 7383 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7384 if (aggregate) { /* TODO: all this part could be made more efficient */ 7385 PetscInt lrows,row,ncols,*cols; 7386 PetscMPIInt nrank; 7387 PetscScalar *vals; 7388 7389 PetscCallMPI(MPI_Comm_rank(subcomm,&nrank)); 7390 lrows = 0; 7391 if (nrank<redprocs) { 7392 lrows = size/redprocs; 7393 if (nrank<size%redprocs) lrows++; 7394 } 7395 PetscCall(MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj)); 7396 PetscCall(MatGetOwnershipRange(subdomain_adj,&rstart,&rend)); 7397 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 7398 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 7399 row = nrank; 7400 ncols = xadj[1]-xadj[0]; 7401 cols = adjncy; 7402 PetscCall(PetscMalloc1(ncols,&vals)); 7403 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7404 PetscCall(MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES)); 7405 PetscCall(MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7406 PetscCall(MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7407 PetscCall(PetscFree(xadj)); 7408 PetscCall(PetscFree(adjncy)); 7409 PetscCall(PetscFree(adjncy_wgt)); 7410 PetscCall(PetscFree(vals)); 7411 if (use_vwgt) { 7412 Vec v; 7413 const PetscScalar *array; 7414 PetscInt nl; 7415 7416 PetscCall(MatCreateVecs(subdomain_adj,&v,NULL)); 7417 PetscCall(VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES)); 7418 PetscCall(VecAssemblyBegin(v)); 7419 PetscCall(VecAssemblyEnd(v)); 7420 PetscCall(VecGetLocalSize(v,&nl)); 7421 PetscCall(VecGetArrayRead(v,&array)); 7422 PetscCall(PetscMalloc1(nl,&v_wgt)); 7423 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7424 PetscCall(VecRestoreArrayRead(v,&array)); 7425 PetscCall(VecDestroy(&v)); 7426 } 7427 } else { 7428 PetscCall(MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj)); 7429 if (use_vwgt) { 7430 PetscCall(PetscMalloc1(1,&v_wgt)); 7431 v_wgt[0] = n; 7432 } 7433 } 7434 /* PetscCall(MatView(subdomain_adj,0)); */ 7435 7436 /* Partition */ 7437 PetscCall(MatPartitioningCreate(subcomm,&partitioner)); 7438 #if defined(PETSC_HAVE_PTSCOTCH) 7439 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH)); 7440 #elif defined(PETSC_HAVE_PARMETIS) 7441 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS)); 7442 #else 7443 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE)); 7444 #endif 7445 PetscCall(MatPartitioningSetAdjacency(partitioner,subdomain_adj)); 7446 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner,v_wgt)); 7447 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7448 PetscCall(MatPartitioningSetNParts(partitioner,*n_subdomains)); 7449 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7450 PetscCall(MatPartitioningApply(partitioner,&new_ranks)); 7451 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7452 7453 /* renumber new_ranks to avoid "holes" in new set of processors */ 7454 PetscCall(ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig)); 7455 PetscCall(ISDestroy(&new_ranks)); 7456 PetscCall(ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7457 if (!aggregate) { 7458 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7459 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7460 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7461 } else if (oldranks) { 7462 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7463 } else { 7464 ranks_send_to_idx[0] = is_indices[0]; 7465 } 7466 } else { 7467 PetscInt idx = 0; 7468 PetscMPIInt tag; 7469 MPI_Request *reqs; 7470 7471 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag)); 7472 PetscCall(PetscMalloc1(rend-rstart,&reqs)); 7473 for (i=rstart;i<rend;i++) { 7474 PetscCallMPI(MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart])); 7475 } 7476 PetscCallMPI(MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE)); 7477 PetscCallMPI(MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE)); 7478 PetscCall(PetscFree(reqs)); 7479 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7480 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7481 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7482 } else if (oldranks) { 7483 ranks_send_to_idx[0] = oldranks[idx]; 7484 } else { 7485 ranks_send_to_idx[0] = idx; 7486 } 7487 } 7488 PetscCall(ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7489 /* clean up */ 7490 PetscCall(PetscFree(oldranks)); 7491 PetscCall(ISDestroy(&new_ranks_contig)); 7492 PetscCall(MatDestroy(&subdomain_adj)); 7493 PetscCall(MatPartitioningDestroy(&partitioner)); 7494 } 7495 PetscCall(PetscSubcommDestroy(&psubcomm)); 7496 PetscCall(PetscFree(procs_candidates)); 7497 7498 /* assemble parallel IS for sends */ 7499 i = 1; 7500 if (!color) i=0; 7501 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends)); 7502 PetscFunctionReturn(0); 7503 } 7504 7505 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7506 7507 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7508 { 7509 Mat local_mat; 7510 IS is_sends_internal; 7511 PetscInt rows,cols,new_local_rows; 7512 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7513 PetscBool ismatis,isdense,newisdense,destroy_mat; 7514 ISLocalToGlobalMapping l2gmap; 7515 PetscInt* l2gmap_indices; 7516 const PetscInt* is_indices; 7517 MatType new_local_type; 7518 /* buffers */ 7519 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7520 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7521 PetscInt *recv_buffer_idxs_local; 7522 PetscScalar *ptr_vals,*recv_buffer_vals; 7523 const PetscScalar *send_buffer_vals; 7524 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7525 /* MPI */ 7526 MPI_Comm comm,comm_n; 7527 PetscSubcomm subcomm; 7528 PetscMPIInt n_sends,n_recvs,size; 7529 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7530 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7531 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7532 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7533 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7534 7535 PetscFunctionBegin; 7536 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7537 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7538 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7539 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7540 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7541 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7542 PetscValidLogicalCollectiveBool(mat,reuse,6); 7543 PetscValidLogicalCollectiveInt(mat,nis,8); 7544 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7545 if (nvecs) { 7546 PetscCheck(nvecs <= 1,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7547 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7548 } 7549 /* further checks */ 7550 PetscCall(MatISGetLocalMat(mat,&local_mat)); 7551 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense)); 7552 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7553 PetscCall(MatGetSize(local_mat,&rows,&cols)); 7554 PetscCheck(rows == cols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7555 if (reuse && *mat_n) { 7556 PetscInt mrows,mcols,mnrows,mncols; 7557 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7558 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis)); 7559 PetscCheck(ismatis,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7560 PetscCall(MatGetSize(mat,&mrows,&mcols)); 7561 PetscCall(MatGetSize(*mat_n,&mnrows,&mncols)); 7562 PetscCheck(mrows == mnrows,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT,mrows,mnrows); 7563 PetscCheck(mcols == mncols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT,mcols,mncols); 7564 } 7565 PetscCall(MatGetBlockSize(local_mat,&bs)); 7566 PetscValidLogicalCollectiveInt(mat,bs,1); 7567 7568 /* prepare IS for sending if not provided */ 7569 if (!is_sends) { 7570 PetscCheck(n_subdomains,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7571 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL)); 7572 } else { 7573 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7574 is_sends_internal = is_sends; 7575 } 7576 7577 /* get comm */ 7578 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 7579 7580 /* compute number of sends */ 7581 PetscCall(ISGetLocalSize(is_sends_internal,&i)); 7582 PetscCall(PetscMPIIntCast(i,&n_sends)); 7583 7584 /* compute number of receives */ 7585 PetscCallMPI(MPI_Comm_size(comm,&size)); 7586 PetscCall(PetscMalloc1(size,&iflags)); 7587 PetscCall(PetscArrayzero(iflags,size)); 7588 PetscCall(ISGetIndices(is_sends_internal,&is_indices)); 7589 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7590 PetscCall(PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs)); 7591 PetscCall(PetscFree(iflags)); 7592 7593 /* restrict comm if requested */ 7594 subcomm = NULL; 7595 destroy_mat = PETSC_FALSE; 7596 if (restrict_comm) { 7597 PetscMPIInt color,subcommsize; 7598 7599 color = 0; 7600 if (restrict_full) { 7601 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7602 } else { 7603 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7604 } 7605 PetscCall(MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm)); 7606 subcommsize = size - subcommsize; 7607 /* check if reuse has been requested */ 7608 if (reuse) { 7609 if (*mat_n) { 7610 PetscMPIInt subcommsize2; 7611 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2)); 7612 PetscCheck(subcommsize == subcommsize2,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7613 comm_n = PetscObjectComm((PetscObject)*mat_n); 7614 } else { 7615 comm_n = PETSC_COMM_SELF; 7616 } 7617 } else { /* MAT_INITIAL_MATRIX */ 7618 PetscMPIInt rank; 7619 7620 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 7621 PetscCall(PetscSubcommCreate(comm,&subcomm)); 7622 PetscCall(PetscSubcommSetNumber(subcomm,2)); 7623 PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank)); 7624 comm_n = PetscSubcommChild(subcomm); 7625 } 7626 /* flag to destroy *mat_n if not significative */ 7627 if (color) destroy_mat = PETSC_TRUE; 7628 } else { 7629 comm_n = comm; 7630 } 7631 7632 /* prepare send/receive buffers */ 7633 PetscCall(PetscMalloc1(size,&ilengths_idxs)); 7634 PetscCall(PetscArrayzero(ilengths_idxs,size)); 7635 PetscCall(PetscMalloc1(size,&ilengths_vals)); 7636 PetscCall(PetscArrayzero(ilengths_vals,size)); 7637 if (nis) { 7638 PetscCall(PetscCalloc1(size,&ilengths_idxs_is)); 7639 } 7640 7641 /* Get data from local matrices */ 7642 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7643 /* TODO: See below some guidelines on how to prepare the local buffers */ 7644 /* 7645 send_buffer_vals should contain the raw values of the local matrix 7646 send_buffer_idxs should contain: 7647 - MatType_PRIVATE type 7648 - PetscInt size_of_l2gmap 7649 - PetscInt global_row_indices[size_of_l2gmap] 7650 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7651 */ 7652 { 7653 ISLocalToGlobalMapping mapping; 7654 7655 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7656 PetscCall(MatDenseGetArrayRead(local_mat,&send_buffer_vals)); 7657 PetscCall(ISLocalToGlobalMappingGetSize(mapping,&i)); 7658 PetscCall(PetscMalloc1(i+2,&send_buffer_idxs)); 7659 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7660 send_buffer_idxs[1] = i; 7661 PetscCall(ISLocalToGlobalMappingGetIndices(mapping,(const PetscInt**)&ptr_idxs)); 7662 PetscCall(PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i)); 7663 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping,(const PetscInt**)&ptr_idxs)); 7664 PetscCall(PetscMPIIntCast(i,&len)); 7665 for (i=0;i<n_sends;i++) { 7666 ilengths_vals[is_indices[i]] = len*len; 7667 ilengths_idxs[is_indices[i]] = len+2; 7668 } 7669 } 7670 PetscCall(PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals)); 7671 /* additional is (if any) */ 7672 if (nis) { 7673 PetscMPIInt psum; 7674 PetscInt j; 7675 for (j=0,psum=0;j<nis;j++) { 7676 PetscInt plen; 7677 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7678 PetscCall(PetscMPIIntCast(plen,&len)); 7679 psum += len+1; /* indices + length */ 7680 } 7681 PetscCall(PetscMalloc1(psum,&send_buffer_idxs_is)); 7682 for (j=0,psum=0;j<nis;j++) { 7683 PetscInt plen; 7684 const PetscInt *is_array_idxs; 7685 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7686 send_buffer_idxs_is[psum] = plen; 7687 PetscCall(ISGetIndices(isarray[j],&is_array_idxs)); 7688 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen)); 7689 PetscCall(ISRestoreIndices(isarray[j],&is_array_idxs)); 7690 psum += plen+1; /* indices + length */ 7691 } 7692 for (i=0;i<n_sends;i++) { 7693 ilengths_idxs_is[is_indices[i]] = psum; 7694 } 7695 PetscCall(PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is)); 7696 } 7697 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 7698 7699 buf_size_idxs = 0; 7700 buf_size_vals = 0; 7701 buf_size_idxs_is = 0; 7702 buf_size_vecs = 0; 7703 for (i=0;i<n_recvs;i++) { 7704 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7705 buf_size_vals += (PetscInt)olengths_vals[i]; 7706 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7707 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7708 } 7709 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs)); 7710 PetscCall(PetscMalloc1(buf_size_vals,&recv_buffer_vals)); 7711 PetscCall(PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is)); 7712 PetscCall(PetscMalloc1(buf_size_vecs,&recv_buffer_vecs)); 7713 7714 /* get new tags for clean communications */ 7715 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs)); 7716 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vals)); 7717 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is)); 7718 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vecs)); 7719 7720 /* allocate for requests */ 7721 PetscCall(PetscMalloc1(n_sends,&send_req_idxs)); 7722 PetscCall(PetscMalloc1(n_sends,&send_req_vals)); 7723 PetscCall(PetscMalloc1(n_sends,&send_req_idxs_is)); 7724 PetscCall(PetscMalloc1(n_sends,&send_req_vecs)); 7725 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs)); 7726 PetscCall(PetscMalloc1(n_recvs,&recv_req_vals)); 7727 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs_is)); 7728 PetscCall(PetscMalloc1(n_recvs,&recv_req_vecs)); 7729 7730 /* communications */ 7731 ptr_idxs = recv_buffer_idxs; 7732 ptr_vals = recv_buffer_vals; 7733 ptr_idxs_is = recv_buffer_idxs_is; 7734 ptr_vecs = recv_buffer_vecs; 7735 for (i=0;i<n_recvs;i++) { 7736 source_dest = onodes[i]; 7737 PetscCallMPI(MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i])); 7738 PetscCallMPI(MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i])); 7739 ptr_idxs += olengths_idxs[i]; 7740 ptr_vals += olengths_vals[i]; 7741 if (nis) { 7742 source_dest = onodes_is[i]; 7743 PetscCallMPI(MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i])); 7744 ptr_idxs_is += olengths_idxs_is[i]; 7745 } 7746 if (nvecs) { 7747 source_dest = onodes[i]; 7748 PetscCallMPI(MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i])); 7749 ptr_vecs += olengths_idxs[i]-2; 7750 } 7751 } 7752 for (i=0;i<n_sends;i++) { 7753 PetscCall(PetscMPIIntCast(is_indices[i],&source_dest)); 7754 PetscCallMPI(MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i])); 7755 PetscCallMPI(MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i])); 7756 if (nis) { 7757 PetscCallMPI(MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i])); 7758 } 7759 if (nvecs) { 7760 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 7761 PetscCallMPI(MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i])); 7762 } 7763 } 7764 PetscCall(ISRestoreIndices(is_sends_internal,&is_indices)); 7765 PetscCall(ISDestroy(&is_sends_internal)); 7766 7767 /* assemble new l2g map */ 7768 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE)); 7769 ptr_idxs = recv_buffer_idxs; 7770 new_local_rows = 0; 7771 for (i=0;i<n_recvs;i++) { 7772 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7773 ptr_idxs += olengths_idxs[i]; 7774 } 7775 PetscCall(PetscMalloc1(new_local_rows,&l2gmap_indices)); 7776 ptr_idxs = recv_buffer_idxs; 7777 new_local_rows = 0; 7778 for (i=0;i<n_recvs;i++) { 7779 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1))); 7780 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7781 ptr_idxs += olengths_idxs[i]; 7782 } 7783 PetscCall(PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices)); 7784 PetscCall(ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap)); 7785 PetscCall(PetscFree(l2gmap_indices)); 7786 7787 /* infer new local matrix type from received local matrices type */ 7788 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7789 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7790 if (n_recvs) { 7791 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7792 ptr_idxs = recv_buffer_idxs; 7793 for (i=0;i<n_recvs;i++) { 7794 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7795 new_local_type_private = MATAIJ_PRIVATE; 7796 break; 7797 } 7798 ptr_idxs += olengths_idxs[i]; 7799 } 7800 switch (new_local_type_private) { 7801 case MATDENSE_PRIVATE: 7802 new_local_type = MATSEQAIJ; 7803 bs = 1; 7804 break; 7805 case MATAIJ_PRIVATE: 7806 new_local_type = MATSEQAIJ; 7807 bs = 1; 7808 break; 7809 case MATBAIJ_PRIVATE: 7810 new_local_type = MATSEQBAIJ; 7811 break; 7812 case MATSBAIJ_PRIVATE: 7813 new_local_type = MATSEQSBAIJ; 7814 break; 7815 default: 7816 SETERRQ(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7817 } 7818 } else { /* by default, new_local_type is seqaij */ 7819 new_local_type = MATSEQAIJ; 7820 bs = 1; 7821 } 7822 7823 /* create MATIS object if needed */ 7824 if (!reuse) { 7825 PetscCall(MatGetSize(mat,&rows,&cols)); 7826 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7827 } else { 7828 /* it also destroys the local matrices */ 7829 if (*mat_n) { 7830 PetscCall(MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap)); 7831 } else { /* this is a fake object */ 7832 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7833 } 7834 } 7835 PetscCall(MatISGetLocalMat(*mat_n,&local_mat)); 7836 PetscCall(MatSetType(local_mat,new_local_type)); 7837 7838 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE)); 7839 7840 /* Global to local map of received indices */ 7841 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local)); /* needed for values insertion */ 7842 PetscCall(ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local)); 7843 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7844 7845 /* restore attributes -> type of incoming data and its size */ 7846 buf_size_idxs = 0; 7847 for (i=0;i<n_recvs;i++) { 7848 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7849 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7850 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7851 } 7852 PetscCall(PetscFree(recv_buffer_idxs)); 7853 7854 /* set preallocation */ 7855 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense)); 7856 if (!newisdense) { 7857 PetscInt *new_local_nnz=NULL; 7858 7859 ptr_idxs = recv_buffer_idxs_local; 7860 if (n_recvs) { 7861 PetscCall(PetscCalloc1(new_local_rows,&new_local_nnz)); 7862 } 7863 for (i=0;i<n_recvs;i++) { 7864 PetscInt j; 7865 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7866 for (j=0;j<*(ptr_idxs+1);j++) { 7867 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7868 } 7869 } else { 7870 /* TODO */ 7871 } 7872 ptr_idxs += olengths_idxs[i]; 7873 } 7874 if (new_local_nnz) { 7875 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7876 PetscCall(MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz)); 7877 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7878 PetscCall(MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7879 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7880 PetscCall(MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7881 } else { 7882 PetscCall(MatSetUp(local_mat)); 7883 } 7884 PetscCall(PetscFree(new_local_nnz)); 7885 } else { 7886 PetscCall(MatSetUp(local_mat)); 7887 } 7888 7889 /* set values */ 7890 ptr_vals = recv_buffer_vals; 7891 ptr_idxs = recv_buffer_idxs_local; 7892 for (i=0;i<n_recvs;i++) { 7893 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7894 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 7895 PetscCall(MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES)); 7896 PetscCall(MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY)); 7897 PetscCall(MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY)); 7898 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 7899 } else { 7900 /* TODO */ 7901 } 7902 ptr_idxs += olengths_idxs[i]; 7903 ptr_vals += olengths_vals[i]; 7904 } 7905 PetscCall(MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY)); 7906 PetscCall(MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY)); 7907 PetscCall(MatISRestoreLocalMat(*mat_n,&local_mat)); 7908 PetscCall(MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY)); 7909 PetscCall(MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY)); 7910 PetscCall(PetscFree(recv_buffer_vals)); 7911 7912 #if 0 7913 if (!restrict_comm) { /* check */ 7914 Vec lvec,rvec; 7915 PetscReal infty_error; 7916 7917 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7918 PetscCall(VecSetRandom(rvec,NULL)); 7919 PetscCall(MatMult(mat,rvec,lvec)); 7920 PetscCall(VecScale(lvec,-1.0)); 7921 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7922 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7923 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7924 PetscCall(VecDestroy(&rvec)); 7925 PetscCall(VecDestroy(&lvec)); 7926 } 7927 #endif 7928 7929 /* assemble new additional is (if any) */ 7930 if (nis) { 7931 PetscInt **temp_idxs,*count_is,j,psum; 7932 7933 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE)); 7934 PetscCall(PetscCalloc1(nis,&count_is)); 7935 ptr_idxs = recv_buffer_idxs_is; 7936 psum = 0; 7937 for (i=0;i<n_recvs;i++) { 7938 for (j=0;j<nis;j++) { 7939 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7940 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7941 psum += plen; 7942 ptr_idxs += plen+1; /* shift pointer to received data */ 7943 } 7944 } 7945 PetscCall(PetscMalloc1(nis,&temp_idxs)); 7946 PetscCall(PetscMalloc1(psum,&temp_idxs[0])); 7947 for (i=1;i<nis;i++) { 7948 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7949 } 7950 PetscCall(PetscArrayzero(count_is,nis)); 7951 ptr_idxs = recv_buffer_idxs_is; 7952 for (i=0;i<n_recvs;i++) { 7953 for (j=0;j<nis;j++) { 7954 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7955 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen)); 7956 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7957 ptr_idxs += plen+1; /* shift pointer to received data */ 7958 } 7959 } 7960 for (i=0;i<nis;i++) { 7961 PetscCall(ISDestroy(&isarray[i])); 7962 PetscCall(PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i])); 7963 PetscCall(ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i])); 7964 } 7965 PetscCall(PetscFree(count_is)); 7966 PetscCall(PetscFree(temp_idxs[0])); 7967 PetscCall(PetscFree(temp_idxs)); 7968 } 7969 /* free workspace */ 7970 PetscCall(PetscFree(recv_buffer_idxs_is)); 7971 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE)); 7972 PetscCall(PetscFree(send_buffer_idxs)); 7973 PetscCallMPI(MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE)); 7974 if (isdense) { 7975 PetscCall(MatISGetLocalMat(mat,&local_mat)); 7976 PetscCall(MatDenseRestoreArrayRead(local_mat,&send_buffer_vals)); 7977 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 7978 } else { 7979 /* PetscCall(PetscFree(send_buffer_vals)); */ 7980 } 7981 if (nis) { 7982 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE)); 7983 PetscCall(PetscFree(send_buffer_idxs_is)); 7984 } 7985 7986 if (nvecs) { 7987 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE)); 7988 PetscCallMPI(MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE)); 7989 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 7990 PetscCall(VecDestroy(&nnsp_vec[0])); 7991 PetscCall(VecCreate(comm_n,&nnsp_vec[0])); 7992 PetscCall(VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE)); 7993 PetscCall(VecSetType(nnsp_vec[0],VECSTANDARD)); 7994 /* set values */ 7995 ptr_vals = recv_buffer_vecs; 7996 ptr_idxs = recv_buffer_idxs_local; 7997 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 7998 for (i=0;i<n_recvs;i++) { 7999 PetscInt j; 8000 for (j=0;j<*(ptr_idxs+1);j++) { 8001 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8002 } 8003 ptr_idxs += olengths_idxs[i]; 8004 ptr_vals += olengths_idxs[i]-2; 8005 } 8006 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8007 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8008 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8009 } 8010 8011 PetscCall(PetscFree(recv_buffer_vecs)); 8012 PetscCall(PetscFree(recv_buffer_idxs_local)); 8013 PetscCall(PetscFree(recv_req_idxs)); 8014 PetscCall(PetscFree(recv_req_vals)); 8015 PetscCall(PetscFree(recv_req_vecs)); 8016 PetscCall(PetscFree(recv_req_idxs_is)); 8017 PetscCall(PetscFree(send_req_idxs)); 8018 PetscCall(PetscFree(send_req_vals)); 8019 PetscCall(PetscFree(send_req_vecs)); 8020 PetscCall(PetscFree(send_req_idxs_is)); 8021 PetscCall(PetscFree(ilengths_vals)); 8022 PetscCall(PetscFree(ilengths_idxs)); 8023 PetscCall(PetscFree(olengths_vals)); 8024 PetscCall(PetscFree(olengths_idxs)); 8025 PetscCall(PetscFree(onodes)); 8026 if (nis) { 8027 PetscCall(PetscFree(ilengths_idxs_is)); 8028 PetscCall(PetscFree(olengths_idxs_is)); 8029 PetscCall(PetscFree(onodes_is)); 8030 } 8031 PetscCall(PetscSubcommDestroy(&subcomm)); 8032 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8033 PetscCall(MatDestroy(mat_n)); 8034 for (i=0;i<nis;i++) { 8035 PetscCall(ISDestroy(&isarray[i])); 8036 } 8037 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8038 PetscCall(VecDestroy(&nnsp_vec[0])); 8039 } 8040 *mat_n = NULL; 8041 } 8042 PetscFunctionReturn(0); 8043 } 8044 8045 /* temporary hack into ksp private data structure */ 8046 #include <petsc/private/kspimpl.h> 8047 8048 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8049 { 8050 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8051 PC_IS *pcis = (PC_IS*)pc->data; 8052 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8053 Mat coarsedivudotp = NULL; 8054 Mat coarseG,t_coarse_mat_is; 8055 MatNullSpace CoarseNullSpace = NULL; 8056 ISLocalToGlobalMapping coarse_islg; 8057 IS coarse_is,*isarray,corners; 8058 PetscInt i,im_active=-1,active_procs=-1; 8059 PetscInt nis,nisdofs,nisneu,nisvert; 8060 PetscInt coarse_eqs_per_proc; 8061 PC pc_temp; 8062 PCType coarse_pc_type; 8063 KSPType coarse_ksp_type; 8064 PetscBool multilevel_requested,multilevel_allowed; 8065 PetscBool coarse_reuse; 8066 PetscInt ncoarse,nedcfield; 8067 PetscBool compute_vecs = PETSC_FALSE; 8068 PetscScalar *array; 8069 MatReuse coarse_mat_reuse; 8070 PetscBool restr, full_restr, have_void; 8071 PetscMPIInt size; 8072 8073 PetscFunctionBegin; 8074 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8075 /* Assign global numbering to coarse dofs */ 8076 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8077 PetscInt ocoarse_size; 8078 compute_vecs = PETSC_TRUE; 8079 8080 pcbddc->new_primal_space = PETSC_TRUE; 8081 ocoarse_size = pcbddc->coarse_size; 8082 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8083 PetscCall(PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices)); 8084 /* see if we can avoid some work */ 8085 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8086 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8087 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8088 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8089 coarse_reuse = PETSC_FALSE; 8090 } else { /* we can safely reuse already computed coarse matrix */ 8091 coarse_reuse = PETSC_TRUE; 8092 } 8093 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8094 coarse_reuse = PETSC_FALSE; 8095 } 8096 /* reset any subassembling information */ 8097 if (!coarse_reuse || pcbddc->recompute_topography) { 8098 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8099 } 8100 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8101 coarse_reuse = PETSC_TRUE; 8102 } 8103 if (coarse_reuse && pcbddc->coarse_ksp) { 8104 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 8105 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8106 coarse_mat_reuse = MAT_REUSE_MATRIX; 8107 } else { 8108 coarse_mat = NULL; 8109 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8110 } 8111 8112 /* creates temporary l2gmap and IS for coarse indexes */ 8113 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is)); 8114 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg)); 8115 8116 /* creates temporary MATIS object for coarse matrix */ 8117 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense)); 8118 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,coarse_islg,&t_coarse_mat_is)); 8119 PetscCall(MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense)); 8120 PetscCall(MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8121 PetscCall(MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8122 PetscCall(MatDestroy(&coarse_submat_dense)); 8123 8124 /* count "active" (i.e. with positive local size) and "void" processes */ 8125 im_active = !!(pcis->n); 8126 PetscCall(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8127 8128 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8129 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8130 /* full_restr : just use the receivers from the subassembling pattern */ 8131 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size)); 8132 coarse_mat_is = NULL; 8133 multilevel_allowed = PETSC_FALSE; 8134 multilevel_requested = PETSC_FALSE; 8135 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8136 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8137 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8138 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8139 if (multilevel_requested) { 8140 ncoarse = active_procs/pcbddc->coarsening_ratio; 8141 restr = PETSC_FALSE; 8142 full_restr = PETSC_FALSE; 8143 } else { 8144 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8145 restr = PETSC_TRUE; 8146 full_restr = PETSC_TRUE; 8147 } 8148 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8149 ncoarse = PetscMax(1,ncoarse); 8150 if (!pcbddc->coarse_subassembling) { 8151 if (pcbddc->coarsening_ratio > 1) { 8152 if (multilevel_requested) { 8153 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8154 } else { 8155 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8156 } 8157 } else { 8158 PetscMPIInt rank; 8159 8160 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank)); 8161 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8162 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling)); 8163 } 8164 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8165 PetscInt psum; 8166 if (pcbddc->coarse_ksp) psum = 1; 8167 else psum = 0; 8168 PetscCall(MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8169 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8170 } 8171 /* determine if we can go multilevel */ 8172 if (multilevel_requested) { 8173 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8174 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8175 } 8176 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8177 8178 /* dump subassembling pattern */ 8179 if (pcbddc->dbg_flag && multilevel_allowed) { 8180 PetscCall(ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer)); 8181 } 8182 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8183 nedcfield = -1; 8184 corners = NULL; 8185 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8186 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8187 const PetscInt *idxs; 8188 ISLocalToGlobalMapping tmap; 8189 8190 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8191 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap)); 8192 /* allocate space for temporary storage */ 8193 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs)); 8194 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs2)); 8195 /* allocate for IS array */ 8196 nisdofs = pcbddc->n_ISForDofsLocal; 8197 if (pcbddc->nedclocal) { 8198 if (pcbddc->nedfield > -1) { 8199 nedcfield = pcbddc->nedfield; 8200 } else { 8201 nedcfield = 0; 8202 PetscCheck(!nisdofs,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%" PetscInt_FMT ")",nisdofs); 8203 nisdofs = 1; 8204 } 8205 } 8206 nisneu = !!pcbddc->NeumannBoundariesLocal; 8207 nisvert = 0; /* nisvert is not used */ 8208 nis = nisdofs + nisneu + nisvert; 8209 PetscCall(PetscMalloc1(nis,&isarray)); 8210 /* dofs splitting */ 8211 for (i=0;i<nisdofs;i++) { 8212 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8213 if (nedcfield != i) { 8214 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize)); 8215 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8216 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8217 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8218 } else { 8219 PetscCall(ISGetLocalSize(pcbddc->nedclocal,&tsize)); 8220 PetscCall(ISGetIndices(pcbddc->nedclocal,&idxs)); 8221 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8222 PetscCheck(tsize == nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT,tsize,nout); 8223 PetscCall(ISRestoreIndices(pcbddc->nedclocal,&idxs)); 8224 } 8225 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8226 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i])); 8227 /* PetscCall(ISView(isarray[i],0)); */ 8228 } 8229 /* neumann boundaries */ 8230 if (pcbddc->NeumannBoundariesLocal) { 8231 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8232 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize)); 8233 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8234 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8235 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8236 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8237 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs])); 8238 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8239 } 8240 /* coordinates */ 8241 if (pcbddc->corner_selected) { 8242 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8243 PetscCall(ISGetLocalSize(corners,&tsize)); 8244 PetscCall(ISGetIndices(corners,&idxs)); 8245 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8246 PetscCheck(tsize == nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT,tsize,nout); 8247 PetscCall(ISRestoreIndices(corners,&idxs)); 8248 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8249 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8250 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners)); 8251 } 8252 PetscCall(PetscFree(tidxs)); 8253 PetscCall(PetscFree(tidxs2)); 8254 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8255 } else { 8256 nis = 0; 8257 nisdofs = 0; 8258 nisneu = 0; 8259 nisvert = 0; 8260 isarray = NULL; 8261 } 8262 /* destroy no longer needed map */ 8263 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8264 8265 /* subassemble */ 8266 if (multilevel_allowed) { 8267 Vec vp[1]; 8268 PetscInt nvecs = 0; 8269 PetscBool reuse,reuser; 8270 8271 if (coarse_mat) reuse = PETSC_TRUE; 8272 else reuse = PETSC_FALSE; 8273 PetscCall(MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8274 vp[0] = NULL; 8275 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8276 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&vp[0])); 8277 PetscCall(VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE)); 8278 PetscCall(VecSetType(vp[0],VECSTANDARD)); 8279 nvecs = 1; 8280 8281 if (pcbddc->divudotp) { 8282 Mat B,loc_divudotp; 8283 Vec v,p; 8284 IS dummy; 8285 PetscInt np; 8286 8287 PetscCall(MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp)); 8288 PetscCall(MatGetSize(loc_divudotp,&np,NULL)); 8289 PetscCall(ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy)); 8290 PetscCall(MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B)); 8291 PetscCall(MatCreateVecs(B,&v,&p)); 8292 PetscCall(VecSet(p,1.)); 8293 PetscCall(MatMultTranspose(B,p,v)); 8294 PetscCall(VecDestroy(&p)); 8295 PetscCall(MatDestroy(&B)); 8296 PetscCall(VecGetArray(vp[0],&array)); 8297 PetscCall(VecPlaceArray(pcbddc->vec1_P,array)); 8298 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P)); 8299 PetscCall(VecResetArray(pcbddc->vec1_P)); 8300 PetscCall(VecRestoreArray(vp[0],&array)); 8301 PetscCall(ISDestroy(&dummy)); 8302 PetscCall(VecDestroy(&v)); 8303 } 8304 } 8305 if (reuser) { 8306 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp)); 8307 } else { 8308 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp)); 8309 } 8310 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8311 PetscScalar *arraym; 8312 const PetscScalar *arrayv; 8313 PetscInt nl; 8314 PetscCall(VecGetLocalSize(vp[0],&nl)); 8315 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp)); 8316 PetscCall(MatDenseGetArray(coarsedivudotp,&arraym)); 8317 PetscCall(VecGetArrayRead(vp[0],&arrayv)); 8318 PetscCall(PetscArraycpy(arraym,arrayv,nl)); 8319 PetscCall(VecRestoreArrayRead(vp[0],&arrayv)); 8320 PetscCall(MatDenseRestoreArray(coarsedivudotp,&arraym)); 8321 PetscCall(VecDestroy(&vp[0])); 8322 } else { 8323 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp)); 8324 } 8325 } else { 8326 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL)); 8327 } 8328 if (coarse_mat_is || coarse_mat) { 8329 if (!multilevel_allowed) { 8330 PetscCall(MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat)); 8331 } else { 8332 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8333 if (coarse_mat_is) { 8334 PetscCheck(!coarse_mat,PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8335 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8336 coarse_mat = coarse_mat_is; 8337 } 8338 } 8339 } 8340 PetscCall(MatDestroy(&t_coarse_mat_is)); 8341 PetscCall(MatDestroy(&coarse_mat_is)); 8342 8343 /* create local to global scatters for coarse problem */ 8344 if (compute_vecs) { 8345 PetscInt lrows; 8346 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8347 if (coarse_mat) { 8348 PetscCall(MatGetLocalSize(coarse_mat,&lrows,NULL)); 8349 } else { 8350 lrows = 0; 8351 } 8352 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec)); 8353 PetscCall(VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE)); 8354 PetscCall(VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8355 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8356 PetscCall(VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob)); 8357 } 8358 PetscCall(ISDestroy(&coarse_is)); 8359 8360 /* set defaults for coarse KSP and PC */ 8361 if (multilevel_allowed) { 8362 coarse_ksp_type = KSPRICHARDSON; 8363 coarse_pc_type = PCBDDC; 8364 } else { 8365 coarse_ksp_type = KSPPREONLY; 8366 coarse_pc_type = PCREDUNDANT; 8367 } 8368 8369 /* print some info if requested */ 8370 if (pcbddc->dbg_flag) { 8371 if (!multilevel_allowed) { 8372 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8373 if (multilevel_requested) { 8374 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio)); 8375 } else if (pcbddc->max_levels) { 8376 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%" PetscInt_FMT ")\n",pcbddc->max_levels)); 8377 } 8378 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8379 } 8380 } 8381 8382 /* communicate coarse discrete gradient */ 8383 coarseG = NULL; 8384 if (pcbddc->nedcG && multilevel_allowed) { 8385 MPI_Comm ccomm; 8386 if (coarse_mat) { 8387 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8388 } else { 8389 ccomm = MPI_COMM_NULL; 8390 } 8391 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG)); 8392 } 8393 8394 /* create the coarse KSP object only once with defaults */ 8395 if (coarse_mat) { 8396 PetscBool isredundant,isbddc,force,valid; 8397 PetscViewer dbg_viewer = NULL; 8398 PetscBool isset,issym,isher,isspd; 8399 8400 if (pcbddc->dbg_flag) { 8401 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8402 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level)); 8403 } 8404 if (!pcbddc->coarse_ksp) { 8405 char prefix[256],str_level[16]; 8406 size_t len; 8407 8408 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp)); 8409 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure)); 8410 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1)); 8411 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1)); 8412 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8413 PetscCall(KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type)); 8414 PetscCall(KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE)); 8415 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8416 /* TODO is this logic correct? should check for coarse_mat type */ 8417 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8418 /* prefix */ 8419 PetscCall(PetscStrcpy(prefix,"")); 8420 PetscCall(PetscStrcpy(str_level,"")); 8421 if (!pcbddc->current_level) { 8422 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix))); 8423 PetscCall(PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix))); 8424 } else { 8425 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 8426 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8427 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8428 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8429 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1)); 8430 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 8431 PetscCall(PetscStrlcat(prefix,str_level,sizeof(prefix))); 8432 } 8433 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix)); 8434 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8435 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8436 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8437 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8438 /* allow user customization */ 8439 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8440 /* get some info after set from options */ 8441 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8442 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8443 force = PETSC_FALSE; 8444 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8445 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8446 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8447 if (multilevel_allowed && !force && !valid) { 8448 isbddc = PETSC_TRUE; 8449 PetscCall(PCSetType(pc_temp,PCBDDC)); 8450 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8451 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8452 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8453 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8454 PetscObjectOptionsBegin((PetscObject)pc_temp); 8455 PetscCall((*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp)); 8456 PetscCall(PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp)); 8457 PetscOptionsEnd(); 8458 pc_temp->setfromoptionscalled++; 8459 } 8460 } 8461 } 8462 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8463 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8464 if (nisdofs) { 8465 PetscCall(PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray)); 8466 for (i=0;i<nisdofs;i++) { 8467 PetscCall(ISDestroy(&isarray[i])); 8468 } 8469 } 8470 if (nisneu) { 8471 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs])); 8472 PetscCall(ISDestroy(&isarray[nisdofs])); 8473 } 8474 if (nisvert) { 8475 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1])); 8476 PetscCall(ISDestroy(&isarray[nis-1])); 8477 } 8478 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE)); 8479 8480 /* get some info after set from options */ 8481 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8482 8483 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8484 if (isbddc && !multilevel_allowed) { 8485 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8486 } 8487 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8488 force = PETSC_FALSE; 8489 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8490 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8491 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8492 PetscCall(PCSetType(pc_temp,PCBDDC)); 8493 } 8494 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant)); 8495 if (isredundant) { 8496 KSP inner_ksp; 8497 PC inner_pc; 8498 8499 PetscCall(PCRedundantGetKSP(pc_temp,&inner_ksp)); 8500 PetscCall(KSPGetPC(inner_ksp,&inner_pc)); 8501 } 8502 8503 /* parameters which miss an API */ 8504 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8505 if (isbddc) { 8506 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8507 8508 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8509 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8510 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8511 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8512 if (pcbddc_coarse->benign_saddle_point) { 8513 Mat coarsedivudotp_is; 8514 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8515 IS row,col; 8516 const PetscInt *gidxs; 8517 PetscInt n,st,M,N; 8518 8519 PetscCall(MatGetSize(coarsedivudotp,&n,NULL)); 8520 PetscCallMPI(MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat))); 8521 st = st-n; 8522 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row)); 8523 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL)); 8524 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap,&n)); 8525 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 8526 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col)); 8527 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 8528 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 8529 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 8530 PetscCall(ISGetSize(row,&M)); 8531 PetscCall(MatGetSize(coarse_mat,&N,NULL)); 8532 PetscCall(ISDestroy(&row)); 8533 PetscCall(ISDestroy(&col)); 8534 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is)); 8535 PetscCall(MatSetType(coarsedivudotp_is,MATIS)); 8536 PetscCall(MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N)); 8537 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g)); 8538 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8539 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8540 PetscCall(MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp)); 8541 PetscCall(MatDestroy(&coarsedivudotp)); 8542 PetscCall(PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL)); 8543 PetscCall(MatDestroy(&coarsedivudotp_is)); 8544 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8545 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8546 } 8547 } 8548 8549 /* propagate symmetry info of coarse matrix */ 8550 PetscCall(MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 8551 PetscCall(MatIsSymmetricKnown(pc->pmat,&isset,&issym)); 8552 if (isset) PetscCall(MatSetOption(coarse_mat,MAT_SYMMETRIC,issym)); 8553 PetscCall(MatIsHermitianKnown(pc->pmat,&isset,&isher)); 8554 if (isset) PetscCall(MatSetOption(coarse_mat,MAT_HERMITIAN,isher)); 8555 PetscCall(MatIsSPDKnown(pc->pmat,&isset,&isspd)); 8556 if (isset) PetscCall(MatSetOption(coarse_mat,MAT_SPD,isspd)); 8557 8558 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8559 PetscCall(MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE)); 8560 } 8561 /* set operators */ 8562 PetscCall(MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view")); 8563 PetscCall(MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix)); 8564 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8565 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level)); 8566 } 8567 PetscCall(MatDestroy(&coarseG)); 8568 PetscCall(PetscFree(isarray)); 8569 #if 0 8570 { 8571 PetscViewer viewer; 8572 char filename[256]; 8573 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8574 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8575 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8576 PetscCall(MatView(coarse_mat,viewer)); 8577 PetscCall(PetscViewerPopFormat(viewer)); 8578 PetscCall(PetscViewerDestroy(&viewer)); 8579 } 8580 #endif 8581 8582 if (corners) { 8583 Vec gv; 8584 IS is; 8585 const PetscInt *idxs; 8586 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8587 PetscScalar *coords; 8588 8589 PetscCheck(pcbddc->mat_graph->cloc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8590 PetscCall(VecGetSize(pcbddc->coarse_vec,&N)); 8591 PetscCall(VecGetLocalSize(pcbddc->coarse_vec,&n)); 8592 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv)); 8593 PetscCall(VecSetBlockSize(gv,cdim)); 8594 PetscCall(VecSetSizes(gv,n*cdim,N*cdim)); 8595 PetscCall(VecSetType(gv,VECSTANDARD)); 8596 PetscCall(VecSetFromOptions(gv)); 8597 PetscCall(VecSet(gv,PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8598 8599 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8600 PetscCall(ISGetLocalSize(is,&n)); 8601 PetscCall(ISGetIndices(is,&idxs)); 8602 PetscCall(PetscMalloc1(n*cdim,&coords)); 8603 for (i=0;i<n;i++) { 8604 for (d=0;d<cdim;d++) { 8605 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8606 } 8607 } 8608 PetscCall(ISRestoreIndices(is,&idxs)); 8609 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8610 8611 PetscCall(ISGetLocalSize(corners,&n)); 8612 PetscCall(ISGetIndices(corners,&idxs)); 8613 PetscCall(VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES)); 8614 PetscCall(ISRestoreIndices(corners,&idxs)); 8615 PetscCall(PetscFree(coords)); 8616 PetscCall(VecAssemblyBegin(gv)); 8617 PetscCall(VecAssemblyEnd(gv)); 8618 PetscCall(VecGetArray(gv,&coords)); 8619 if (pcbddc->coarse_ksp) { 8620 PC coarse_pc; 8621 PetscBool isbddc; 8622 8623 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 8624 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 8625 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8626 PetscReal *realcoords; 8627 8628 PetscCall(VecGetLocalSize(gv,&n)); 8629 #if defined(PETSC_USE_COMPLEX) 8630 PetscCall(PetscMalloc1(n,&realcoords)); 8631 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8632 #else 8633 realcoords = coords; 8634 #endif 8635 PetscCall(PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords)); 8636 #if defined(PETSC_USE_COMPLEX) 8637 PetscCall(PetscFree(realcoords)); 8638 #endif 8639 } 8640 } 8641 PetscCall(VecRestoreArray(gv,&coords)); 8642 PetscCall(VecDestroy(&gv)); 8643 } 8644 PetscCall(ISDestroy(&corners)); 8645 8646 if (pcbddc->coarse_ksp) { 8647 Vec crhs,csol; 8648 8649 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&csol)); 8650 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&crhs)); 8651 if (!csol) { 8652 PetscCall(MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL)); 8653 } 8654 if (!crhs) { 8655 PetscCall(MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs))); 8656 } 8657 } 8658 PetscCall(MatDestroy(&coarsedivudotp)); 8659 8660 /* compute null space for coarse solver if the benign trick has been requested */ 8661 if (pcbddc->benign_null) { 8662 8663 PetscCall(VecSet(pcbddc->vec1_P,0.)); 8664 for (i=0;i<pcbddc->benign_n;i++) { 8665 PetscCall(VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES)); 8666 } 8667 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8668 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8669 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8670 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8671 if (coarse_mat) { 8672 Vec nullv; 8673 PetscScalar *array,*array2; 8674 PetscInt nl; 8675 8676 PetscCall(MatCreateVecs(coarse_mat,&nullv,NULL)); 8677 PetscCall(VecGetLocalSize(nullv,&nl)); 8678 PetscCall(VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8679 PetscCall(VecGetArray(nullv,&array2)); 8680 PetscCall(PetscArraycpy(array2,array,nl)); 8681 PetscCall(VecRestoreArray(nullv,&array2)); 8682 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8683 PetscCall(VecNormalize(nullv,NULL)); 8684 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace)); 8685 PetscCall(VecDestroy(&nullv)); 8686 } 8687 } 8688 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8689 8690 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8691 if (pcbddc->coarse_ksp) { 8692 PetscBool ispreonly; 8693 8694 if (CoarseNullSpace) { 8695 PetscBool isnull; 8696 8697 PetscCall(MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull)); 8698 if (isnull) PetscCall(MatSetNullSpace(coarse_mat,CoarseNullSpace)); 8699 /* TODO: add local nullspaces (if any) */ 8700 } 8701 /* setup coarse ksp */ 8702 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8703 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8704 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly)); 8705 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8706 KSP check_ksp; 8707 KSPType check_ksp_type; 8708 PC check_pc; 8709 Vec check_vec,coarse_vec; 8710 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8711 PetscInt its; 8712 PetscBool compute_eigs; 8713 PetscReal *eigs_r,*eigs_c; 8714 PetscInt neigs; 8715 const char *prefix; 8716 8717 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8718 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp)); 8719 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0)); 8720 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE)); 8721 PetscCall(KSPSetOperators(check_ksp,coarse_mat,coarse_mat)); 8722 PetscCall(KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size)); 8723 /* prevent from setup unneeded object */ 8724 PetscCall(KSPGetPC(check_ksp,&check_pc)); 8725 PetscCall(PCSetType(check_pc,PCNONE)); 8726 if (ispreonly) { 8727 check_ksp_type = KSPPREONLY; 8728 compute_eigs = PETSC_FALSE; 8729 } else { 8730 check_ksp_type = KSPGMRES; 8731 compute_eigs = PETSC_TRUE; 8732 } 8733 PetscCall(KSPSetType(check_ksp,check_ksp_type)); 8734 PetscCall(KSPSetComputeSingularValues(check_ksp,compute_eigs)); 8735 PetscCall(KSPSetComputeEigenvalues(check_ksp,compute_eigs)); 8736 PetscCall(KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1)); 8737 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix)); 8738 PetscCall(KSPSetOptionsPrefix(check_ksp,prefix)); 8739 PetscCall(KSPAppendOptionsPrefix(check_ksp,"check_")); 8740 PetscCall(KSPSetFromOptions(check_ksp)); 8741 PetscCall(KSPSetUp(check_ksp)); 8742 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&check_pc)); 8743 PetscCall(KSPSetPC(check_ksp,check_pc)); 8744 /* create random vec */ 8745 PetscCall(MatCreateVecs(coarse_mat,&coarse_vec,&check_vec)); 8746 PetscCall(VecSetRandom(check_vec,NULL)); 8747 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8748 /* solve coarse problem */ 8749 PetscCall(KSPSolve(check_ksp,coarse_vec,coarse_vec)); 8750 PetscCall(KSPCheckSolve(check_ksp,pc,coarse_vec)); 8751 /* set eigenvalue estimation if preonly has not been requested */ 8752 if (compute_eigs) { 8753 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_r)); 8754 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_c)); 8755 PetscCall(KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs)); 8756 if (neigs) { 8757 lambda_max = eigs_r[neigs-1]; 8758 lambda_min = eigs_r[0]; 8759 if (pcbddc->use_coarse_estimates) { 8760 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8761 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min)); 8762 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min))); 8763 } 8764 } 8765 } 8766 } 8767 8768 /* check coarse problem residual error */ 8769 if (pcbddc->dbg_flag) { 8770 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8771 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1))); 8772 PetscCall(VecAXPY(check_vec,-1.0,coarse_vec)); 8773 PetscCall(VecNorm(check_vec,NORM_INFINITY,&infty_error)); 8774 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8775 PetscCall(VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error)); 8776 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates)); 8777 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer)); 8778 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer)); 8779 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",(double)infty_error)); 8780 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",(double)abs_infty_error)); 8781 if (CoarseNullSpace) { 8782 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n")); 8783 } 8784 if (compute_eigs) { 8785 PetscReal lambda_max_s,lambda_min_s; 8786 KSPConvergedReason reason; 8787 PetscCall(KSPGetType(check_ksp,&check_ksp_type)); 8788 PetscCall(KSPGetIterationNumber(check_ksp,&its)); 8789 PetscCall(KSPGetConvergedReason(check_ksp,&reason)); 8790 PetscCall(KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s)); 8791 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,(double)lambda_min,(double)lambda_max,(double)lambda_min_s,(double)lambda_max_s)); 8792 for (i=0;i<neigs;i++) { 8793 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",(double)eigs_r[i],(double)eigs_c[i])); 8794 } 8795 } 8796 PetscCall(PetscViewerFlush(dbg_viewer)); 8797 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1))); 8798 } 8799 PetscCall(VecDestroy(&check_vec)); 8800 PetscCall(VecDestroy(&coarse_vec)); 8801 PetscCall(KSPDestroy(&check_ksp)); 8802 if (compute_eigs) { 8803 PetscCall(PetscFree(eigs_r)); 8804 PetscCall(PetscFree(eigs_c)); 8805 } 8806 } 8807 } 8808 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8809 /* print additional info */ 8810 if (pcbddc->dbg_flag) { 8811 /* waits until all processes reaches this point */ 8812 PetscCall(PetscBarrier((PetscObject)pc)); 8813 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %" PetscInt_FMT "\n",pcbddc->current_level)); 8814 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8815 } 8816 8817 /* free memory */ 8818 PetscCall(MatDestroy(&coarse_mat)); 8819 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8820 PetscFunctionReturn(0); 8821 } 8822 8823 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8824 { 8825 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8826 PC_IS* pcis = (PC_IS*)pc->data; 8827 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8828 IS subset,subset_mult,subset_n; 8829 PetscInt local_size,coarse_size=0; 8830 PetscInt *local_primal_indices=NULL; 8831 const PetscInt *t_local_primal_indices; 8832 8833 PetscFunctionBegin; 8834 /* Compute global number of coarse dofs */ 8835 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8836 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n)); 8837 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset)); 8838 PetscCall(ISDestroy(&subset_n)); 8839 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult)); 8840 PetscCall(ISRenumber(subset,subset_mult,&coarse_size,&subset_n)); 8841 PetscCall(ISDestroy(&subset)); 8842 PetscCall(ISDestroy(&subset_mult)); 8843 PetscCall(ISGetLocalSize(subset_n,&local_size)); 8844 PetscCheck(local_size == pcbddc->local_primal_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT,local_size,pcbddc->local_primal_size); 8845 PetscCall(PetscMalloc1(local_size,&local_primal_indices)); 8846 PetscCall(ISGetIndices(subset_n,&t_local_primal_indices)); 8847 PetscCall(PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size)); 8848 PetscCall(ISRestoreIndices(subset_n,&t_local_primal_indices)); 8849 PetscCall(ISDestroy(&subset_n)); 8850 8851 /* check numbering */ 8852 if (pcbddc->dbg_flag) { 8853 PetscScalar coarsesum,*array,*array2; 8854 PetscInt i; 8855 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8856 8857 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8858 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8859 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n")); 8860 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8861 /* counter */ 8862 PetscCall(VecSet(pcis->vec1_global,0.0)); 8863 PetscCall(VecSet(pcis->vec1_N,1.0)); 8864 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8865 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8866 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8867 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8868 PetscCall(VecSet(pcis->vec1_N,0.0)); 8869 for (i=0;i<pcbddc->local_primal_size;i++) { 8870 PetscCall(VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES)); 8871 } 8872 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8873 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8874 PetscCall(VecSet(pcis->vec1_global,0.0)); 8875 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8876 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8877 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8878 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8879 PetscCall(VecGetArray(pcis->vec1_N,&array)); 8880 PetscCall(VecGetArray(pcis->vec2_N,&array2)); 8881 for (i=0;i<pcis->n;i++) { 8882 if (array[i] != 0.0 && array[i] != array2[i]) { 8883 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8884 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8885 set_error = PETSC_TRUE; 8886 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi)); 8887 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n",PetscGlobalRank,i,gi,owned,neigh)); 8888 } 8889 } 8890 PetscCall(VecRestoreArray(pcis->vec2_N,&array2)); 8891 PetscCall(MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8892 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8893 for (i=0;i<pcis->n;i++) { 8894 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8895 } 8896 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 8897 PetscCall(VecSet(pcis->vec1_global,0.0)); 8898 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8899 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8900 PetscCall(VecSum(pcis->vec1_global,&coarsesum)); 8901 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %" PetscInt_FMT " (%lf)\n",coarse_size,(double)PetscRealPart(coarsesum))); 8902 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8903 PetscInt *gidxs; 8904 8905 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&gidxs)); 8906 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs)); 8907 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n")); 8908 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8909 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank)); 8910 for (i=0;i<pcbddc->local_primal_size;i++) { 8911 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i])); 8912 } 8913 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8914 PetscCall(PetscFree(gidxs)); 8915 } 8916 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8917 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8918 PetscCheck(!set_error_reduced,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8919 } 8920 8921 /* get back data */ 8922 *coarse_size_n = coarse_size; 8923 *local_primal_indices_n = local_primal_indices; 8924 PetscFunctionReturn(0); 8925 } 8926 8927 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8928 { 8929 IS localis_t; 8930 PetscInt i,lsize,*idxs,n; 8931 PetscScalar *vals; 8932 8933 PetscFunctionBegin; 8934 /* get indices in local ordering exploiting local to global map */ 8935 PetscCall(ISGetLocalSize(globalis,&lsize)); 8936 PetscCall(PetscMalloc1(lsize,&vals)); 8937 for (i=0;i<lsize;i++) vals[i] = 1.0; 8938 PetscCall(ISGetIndices(globalis,(const PetscInt**)&idxs)); 8939 PetscCall(VecSet(gwork,0.0)); 8940 PetscCall(VecSet(lwork,0.0)); 8941 if (idxs) { /* multilevel guard */ 8942 PetscCall(VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE)); 8943 PetscCall(VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES)); 8944 } 8945 PetscCall(VecAssemblyBegin(gwork)); 8946 PetscCall(ISRestoreIndices(globalis,(const PetscInt**)&idxs)); 8947 PetscCall(PetscFree(vals)); 8948 PetscCall(VecAssemblyEnd(gwork)); 8949 /* now compute set in local ordering */ 8950 PetscCall(VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 8951 PetscCall(VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 8952 PetscCall(VecGetArrayRead(lwork,(const PetscScalar**)&vals)); 8953 PetscCall(VecGetSize(lwork,&n)); 8954 for (i=0,lsize=0;i<n;i++) { 8955 if (PetscRealPart(vals[i]) > 0.5) { 8956 lsize++; 8957 } 8958 } 8959 PetscCall(PetscMalloc1(lsize,&idxs)); 8960 for (i=0,lsize=0;i<n;i++) { 8961 if (PetscRealPart(vals[i]) > 0.5) { 8962 idxs[lsize++] = i; 8963 } 8964 } 8965 PetscCall(VecRestoreArrayRead(lwork,(const PetscScalar**)&vals)); 8966 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t)); 8967 *localis = localis_t; 8968 PetscFunctionReturn(0); 8969 } 8970 8971 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 8972 { 8973 PC_IS *pcis = (PC_IS*)pc->data; 8974 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8975 PC_IS *pcisf; 8976 PC_BDDC *pcbddcf; 8977 PC pcf; 8978 8979 PetscFunctionBegin; 8980 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc),&pcf)); 8981 PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)pcf)); 8982 PetscCall(PCSetOperators(pcf,pc->mat,pc->pmat)); 8983 PetscCall(PCSetType(pcf,PCBDDC)); 8984 8985 pcisf = (PC_IS*)pcf->data; 8986 pcbddcf = (PC_BDDC*)pcf->data; 8987 8988 pcisf->is_B_local = pcis->is_B_local; 8989 pcisf->vec1_N = pcis->vec1_N; 8990 pcisf->BtoNmap = pcis->BtoNmap; 8991 pcisf->n = pcis->n; 8992 pcisf->n_B = pcis->n_B; 8993 8994 PetscCall(PetscFree(pcbddcf->mat_graph)); 8995 PetscCall(PetscFree(pcbddcf->sub_schurs)); 8996 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 8997 pcbddcf->sub_schurs = schurs; 8998 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 8999 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 9000 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 9001 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 9002 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9003 pcbddcf->use_faces = PETSC_TRUE; 9004 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9005 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9006 pcbddcf->use_qr_single = (PetscBool)!constraints; 9007 pcbddcf->fake_change = PETSC_TRUE; 9008 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9009 9010 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9011 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9012 9013 *change = pcbddcf->ConstraintMatrix; 9014 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat),pcbddcf->local_primal_size_cc,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,change_primal)); 9015 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat),pcbddcf->local_primal_size_cc,pcbddcf->local_primal_ref_mult,PETSC_COPY_VALUES,change_primal_mult)); 9016 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9017 9018 if (schurs) pcbddcf->sub_schurs = NULL; 9019 pcbddcf->ConstraintMatrix = NULL; 9020 pcbddcf->mat_graph = NULL; 9021 pcisf->is_B_local = NULL; 9022 pcisf->vec1_N = NULL; 9023 pcisf->BtoNmap = NULL; 9024 PetscCall(PCDestroy(&pcf)); 9025 PetscFunctionReturn(0); 9026 } 9027 9028 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9029 { 9030 PC_IS *pcis=(PC_IS*)pc->data; 9031 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9032 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9033 Mat S_j; 9034 PetscInt *used_xadj,*used_adjncy; 9035 PetscBool free_used_adj; 9036 9037 PetscFunctionBegin; 9038 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9039 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9040 free_used_adj = PETSC_FALSE; 9041 if (pcbddc->sub_schurs_layers == -1) { 9042 used_xadj = NULL; 9043 used_adjncy = NULL; 9044 } else { 9045 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9046 used_xadj = pcbddc->mat_graph->xadj; 9047 used_adjncy = pcbddc->mat_graph->adjncy; 9048 } else if (pcbddc->computed_rowadj) { 9049 used_xadj = pcbddc->mat_graph->xadj; 9050 used_adjncy = pcbddc->mat_graph->adjncy; 9051 } else { 9052 PetscBool flg_row=PETSC_FALSE; 9053 const PetscInt *xadj,*adjncy; 9054 PetscInt nvtxs; 9055 9056 PetscCall(MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9057 if (flg_row) { 9058 PetscCall(PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy)); 9059 PetscCall(PetscArraycpy(used_xadj,xadj,nvtxs+1)); 9060 PetscCall(PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs])); 9061 free_used_adj = PETSC_TRUE; 9062 } else { 9063 pcbddc->sub_schurs_layers = -1; 9064 used_xadj = NULL; 9065 used_adjncy = NULL; 9066 } 9067 PetscCall(MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9068 } 9069 } 9070 9071 /* setup sub_schurs data */ 9072 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9073 if (!sub_schurs->schur_explicit) { 9074 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9075 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9076 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL)); 9077 } else { 9078 Mat change = NULL; 9079 Vec scaling = NULL; 9080 IS change_primal = NULL, iP; 9081 PetscInt benign_n; 9082 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9083 PetscBool need_change = PETSC_FALSE; 9084 PetscBool discrete_harmonic = PETSC_FALSE; 9085 9086 if (!pcbddc->use_vertices && reuse_solvers) { 9087 PetscInt n_vertices; 9088 9089 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 9090 reuse_solvers = (PetscBool)!n_vertices; 9091 } 9092 if (!pcbddc->benign_change_explicit) { 9093 benign_n = pcbddc->benign_n; 9094 } else { 9095 benign_n = 0; 9096 } 9097 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9098 We need a global reduction to avoid possible deadlocks. 9099 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9100 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9101 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9102 PetscCall(MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 9103 need_change = (PetscBool)(!need_change); 9104 } 9105 /* If the user defines additional constraints, we import them here */ 9106 if (need_change) { 9107 PetscCheck(!pcbddc->sub_schurs_rebuild,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9108 PetscCall(PCBDDCComputeFakeChange(pc,PETSC_FALSE,NULL,NULL,&change,&change_primal,NULL,&sub_schurs->change_with_qr)); 9109 9110 } 9111 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9112 9113 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP)); 9114 if (iP) { 9115 PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC"); 9116 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL)); 9117 PetscOptionsEnd(); 9118 } 9119 if (discrete_harmonic) { 9120 Mat A; 9121 PetscCall(MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A)); 9122 PetscCall(MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL)); 9123 PetscCall(PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP)); 9124 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9125 PetscCall(MatDestroy(&A)); 9126 } else { 9127 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9128 } 9129 PetscCall(MatDestroy(&change)); 9130 PetscCall(ISDestroy(&change_primal)); 9131 } 9132 PetscCall(MatDestroy(&S_j)); 9133 9134 /* free adjacency */ 9135 if (free_used_adj) PetscCall(PetscFree2(used_xadj,used_adjncy)); 9136 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9137 PetscFunctionReturn(0); 9138 } 9139 9140 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9141 { 9142 PC_IS *pcis=(PC_IS*)pc->data; 9143 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9144 PCBDDCGraph graph; 9145 9146 PetscFunctionBegin; 9147 /* attach interface graph for determining subsets */ 9148 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9149 IS verticesIS,verticescomm; 9150 PetscInt vsize,*idxs; 9151 9152 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9153 PetscCall(ISGetSize(verticesIS,&vsize)); 9154 PetscCall(ISGetIndices(verticesIS,(const PetscInt**)&idxs)); 9155 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm)); 9156 PetscCall(ISRestoreIndices(verticesIS,(const PetscInt**)&idxs)); 9157 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9158 PetscCall(PCBDDCGraphCreate(&graph)); 9159 PetscCall(PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount)); 9160 PetscCall(PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm)); 9161 PetscCall(ISDestroy(&verticescomm)); 9162 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9163 } else { 9164 graph = pcbddc->mat_graph; 9165 } 9166 /* print some info */ 9167 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9168 IS vertices; 9169 PetscInt nv,nedges,nfaces; 9170 PetscCall(PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 9171 PetscCall(PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9172 PetscCall(ISGetSize(vertices,&nv)); 9173 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9174 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 9175 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices)); 9176 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges)); 9177 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces)); 9178 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9179 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9180 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9181 } 9182 9183 /* sub_schurs init */ 9184 if (!pcbddc->sub_schurs) { 9185 PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9186 } 9187 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild,PETSC_FALSE)); 9188 9189 /* free graph struct */ 9190 if (pcbddc->sub_schurs_rebuild) { 9191 PetscCall(PCBDDCGraphDestroy(&graph)); 9192 } 9193 PetscFunctionReturn(0); 9194 } 9195 9196 PetscErrorCode PCBDDCCheckOperator(PC pc) 9197 { 9198 PC_IS *pcis=(PC_IS*)pc->data; 9199 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9200 9201 PetscFunctionBegin; 9202 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9203 IS zerodiag = NULL; 9204 Mat S_j,B0_B=NULL; 9205 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9206 PetscScalar *p0_check,*array,*array2; 9207 PetscReal norm; 9208 PetscInt i; 9209 9210 /* B0 and B0_B */ 9211 if (zerodiag) { 9212 IS dummy; 9213 9214 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy)); 9215 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 9216 PetscCall(MatCreateVecs(B0_B,NULL,&dummy_vec)); 9217 PetscCall(ISDestroy(&dummy)); 9218 } 9219 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9220 PetscCall(VecDuplicate(pcbddc->vec1_P,&vec_scale_P)); 9221 PetscCall(VecSet(pcbddc->vec1_P,1.0)); 9222 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9223 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9224 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9225 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9226 PetscCall(VecReciprocal(vec_scale_P)); 9227 /* S_j */ 9228 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9229 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9230 9231 /* mimic vector in \widetilde{W}_\Gamma */ 9232 PetscCall(VecSetRandom(pcis->vec1_N,NULL)); 9233 /* continuous in primal space */ 9234 PetscCall(VecSetRandom(pcbddc->coarse_vec,NULL)); 9235 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9236 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9237 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9238 PetscCall(PetscCalloc1(pcbddc->benign_n,&p0_check)); 9239 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9240 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9241 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9242 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9243 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9244 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9245 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9246 PetscCall(VecDuplicate(pcis->vec2_B,&vec_check_B)); 9247 PetscCall(VecCopy(pcis->vec2_B,vec_check_B)); 9248 9249 /* assemble rhs for coarse problem */ 9250 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9251 /* local with Schur */ 9252 PetscCall(MatMult(S_j,pcis->vec2_B,pcis->vec1_B)); 9253 if (zerodiag) { 9254 PetscCall(VecGetArray(dummy_vec,&array)); 9255 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9256 PetscCall(VecRestoreArray(dummy_vec,&array)); 9257 PetscCall(MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B)); 9258 } 9259 /* sum on primal nodes the local contributions */ 9260 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9261 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9262 PetscCall(VecGetArray(pcis->vec1_N,&array)); 9263 PetscCall(VecGetArray(pcbddc->vec1_P,&array2)); 9264 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9265 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array2)); 9266 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 9267 PetscCall(VecSet(pcbddc->coarse_vec,0.)); 9268 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9269 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9270 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9271 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9272 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9273 /* scale primal nodes (BDDC sums contibutions) */ 9274 PetscCall(VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P)); 9275 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9276 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9277 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9278 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9279 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9280 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9281 /* global: \widetilde{B0}_B w_\Gamma */ 9282 if (zerodiag) { 9283 PetscCall(MatMult(B0_B,pcis->vec2_B,dummy_vec)); 9284 PetscCall(VecGetArray(dummy_vec,&array)); 9285 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9286 PetscCall(VecRestoreArray(dummy_vec,&array)); 9287 } 9288 /* BDDC */ 9289 PetscCall(VecSet(pcis->vec1_D,0.)); 9290 PetscCall(PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE)); 9291 9292 PetscCall(VecCopy(pcis->vec1_B,pcis->vec2_B)); 9293 PetscCall(VecAXPY(pcis->vec1_B,-1.0,vec_check_B)); 9294 PetscCall(VecNorm(pcis->vec1_B,NORM_INFINITY,&norm)); 9295 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,(double)norm)); 9296 for (i=0;i<pcbddc->benign_n;i++) { 9297 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%" PetscInt_FMT "] error is %1.4e\n",PetscGlobalRank,i,(double)PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]))); 9298 } 9299 PetscCall(PetscFree(p0_check)); 9300 PetscCall(VecDestroy(&vec_scale_P)); 9301 PetscCall(VecDestroy(&vec_check_B)); 9302 PetscCall(VecDestroy(&dummy_vec)); 9303 PetscCall(MatDestroy(&S_j)); 9304 PetscCall(MatDestroy(&B0_B)); 9305 } 9306 PetscFunctionReturn(0); 9307 } 9308 9309 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9310 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9311 { 9312 Mat At; 9313 IS rows; 9314 PetscInt rst,ren; 9315 PetscLayout rmap; 9316 9317 PetscFunctionBegin; 9318 rst = ren = 0; 9319 if (ccomm != MPI_COMM_NULL) { 9320 PetscCall(PetscLayoutCreate(ccomm,&rmap)); 9321 PetscCall(PetscLayoutSetSize(rmap,A->rmap->N)); 9322 PetscCall(PetscLayoutSetBlockSize(rmap,1)); 9323 PetscCall(PetscLayoutSetUp(rmap)); 9324 PetscCall(PetscLayoutGetRange(rmap,&rst,&ren)); 9325 } 9326 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows)); 9327 PetscCall(MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At)); 9328 PetscCall(ISDestroy(&rows)); 9329 9330 if (ccomm != MPI_COMM_NULL) { 9331 Mat_MPIAIJ *a,*b; 9332 IS from,to; 9333 Vec gvec; 9334 PetscInt lsize; 9335 9336 PetscCall(MatCreate(ccomm,B)); 9337 PetscCall(MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N)); 9338 PetscCall(MatSetType(*B,MATAIJ)); 9339 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 9340 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9341 a = (Mat_MPIAIJ*)At->data; 9342 b = (Mat_MPIAIJ*)(*B)->data; 9343 PetscCallMPI(MPI_Comm_size(ccomm,&b->size)); 9344 PetscCallMPI(MPI_Comm_rank(ccomm,&b->rank)); 9345 PetscCall(PetscObjectReference((PetscObject)a->A)); 9346 PetscCall(PetscObjectReference((PetscObject)a->B)); 9347 b->A = a->A; 9348 b->B = a->B; 9349 9350 b->donotstash = a->donotstash; 9351 b->roworiented = a->roworiented; 9352 b->rowindices = NULL; 9353 b->rowvalues = NULL; 9354 b->getrowactive = PETSC_FALSE; 9355 9356 (*B)->rmap = rmap; 9357 (*B)->factortype = A->factortype; 9358 (*B)->assembled = PETSC_TRUE; 9359 (*B)->insertmode = NOT_SET_VALUES; 9360 (*B)->preallocated = PETSC_TRUE; 9361 9362 if (a->colmap) { 9363 #if defined(PETSC_USE_CTABLE) 9364 PetscCall(PetscTableCreateCopy(a->colmap,&b->colmap)); 9365 #else 9366 PetscCall(PetscMalloc1(At->cmap->N,&b->colmap)); 9367 PetscCall(PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt))); 9368 PetscCall(PetscArraycpy(b->colmap,a->colmap,At->cmap->N)); 9369 #endif 9370 } else b->colmap = NULL; 9371 if (a->garray) { 9372 PetscInt len; 9373 len = a->B->cmap->n; 9374 PetscCall(PetscMalloc1(len+1,&b->garray)); 9375 PetscCall(PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt))); 9376 if (len) PetscCall(PetscArraycpy(b->garray,a->garray,len)); 9377 } else b->garray = NULL; 9378 9379 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9380 b->lvec = a->lvec; 9381 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec)); 9382 9383 /* cannot use VecScatterCopy */ 9384 PetscCall(VecGetLocalSize(b->lvec,&lsize)); 9385 PetscCall(ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from)); 9386 PetscCall(ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to)); 9387 PetscCall(MatCreateVecs(*B,&gvec,NULL)); 9388 PetscCall(VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx)); 9389 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx)); 9390 PetscCall(ISDestroy(&from)); 9391 PetscCall(ISDestroy(&to)); 9392 PetscCall(VecDestroy(&gvec)); 9393 } 9394 PetscCall(MatDestroy(&At)); 9395 PetscFunctionReturn(0); 9396 } 9397