1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 PetscCall(MatGetSize(A,&nr,&nc)); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 PetscCall(PetscMalloc1(ulw,&uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 PetscCall(PetscMalloc1(n,&sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 PetscCall(PetscMalloc1(nr*nr,&U)); 46 PetscCall(PetscBLASIntCast(nr,&bM)); 47 PetscCall(PetscBLASIntCast(nc,&bN)); 48 PetscCall(PetscBLASIntCast(ulw,&lwork)); 49 PetscCall(MatDenseGetArray(A,&data)); 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 53 #else 54 PetscCall(PetscMalloc1(5*n,&rwork2)); 55 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 56 PetscCall(PetscFree(rwork2)); 57 #endif 58 PetscCall(PetscFPTrapPop()); 59 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 60 PetscCall(MatDenseRestoreArray(A,&data)); 61 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 62 if (!rwork) { 63 PetscCall(PetscFree(sing)); 64 } 65 if (!work) { 66 PetscCall(PetscFree(uwork)); 67 } 68 /* create B */ 69 if (!range) { 70 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B)); 71 PetscCall(MatDenseGetArray(*B,&data)); 72 PetscCall(PetscArraycpy(data,U+nr*i,(nr-i)*nr)); 73 } else { 74 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B)); 75 PetscCall(MatDenseGetArray(*B,&data)); 76 PetscCall(PetscArraycpy(data,U,i*nr)); 77 } 78 PetscCall(MatDenseRestoreArray(*B,&data)); 79 PetscCall(PetscFree(U)); 80 PetscFunctionReturn(0); 81 } 82 83 /* TODO REMOVE */ 84 #if defined(PRINT_GDET) 85 static int inc = 0; 86 static int lev = 0; 87 #endif 88 89 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 90 { 91 Mat GE,GEd; 92 PetscInt rsize,csize,esize; 93 PetscScalar *ptr; 94 95 PetscFunctionBegin; 96 PetscCall(ISGetSize(edge,&esize)); 97 if (!esize) PetscFunctionReturn(0); 98 PetscCall(ISGetSize(extrow,&rsize)); 99 PetscCall(ISGetSize(extcol,&csize)); 100 101 /* gradients */ 102 ptr = work + 5*esize; 103 PetscCall(MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE)); 104 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins)); 105 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins)); 106 PetscCall(MatDestroy(&GE)); 107 108 /* constants */ 109 ptr += rsize*csize; 110 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd)); 111 PetscCall(MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE)); 112 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd)); 113 PetscCall(MatDestroy(&GE)); 114 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins)); 115 PetscCall(MatDestroy(&GEd)); 116 117 if (corners) { 118 Mat GEc; 119 const PetscScalar *vals; 120 PetscScalar v; 121 122 PetscCall(MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc)); 123 PetscCall(MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd)); 124 PetscCall(MatDenseGetArrayRead(GEd,&vals)); 125 /* v = PetscAbsScalar(vals[0]) */; 126 v = 1.; 127 cvals[0] = vals[0]/v; 128 cvals[1] = vals[1]/v; 129 PetscCall(MatDenseRestoreArrayRead(GEd,&vals)); 130 PetscCall(MatScale(*GKins,1./v)); 131 #if defined(PRINT_GDET) 132 { 133 PetscViewer viewer; 134 char filename[256]; 135 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 136 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 137 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 138 PetscCall(PetscObjectSetName((PetscObject)GEc,"GEc")); 139 PetscCall(MatView(GEc,viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)(*GKins),"GK")); 141 PetscCall(MatView(*GKins,viewer)); 142 PetscCall(PetscObjectSetName((PetscObject)GEd,"Gproj")); 143 PetscCall(MatView(GEd,viewer)); 144 PetscCall(PetscViewerDestroy(&viewer)); 145 } 146 #endif 147 PetscCall(MatDestroy(&GEd)); 148 PetscCall(MatDestroy(&GEc)); 149 } 150 151 PetscFunctionReturn(0); 152 } 153 154 PetscErrorCode PCBDDCNedelecSupport(PC pc) 155 { 156 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 157 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 158 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 159 Vec tvec; 160 PetscSF sfv; 161 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 162 MPI_Comm comm; 163 IS lned,primals,allprimals,nedfieldlocal; 164 IS *eedges,*extrows,*extcols,*alleedges; 165 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 166 PetscScalar *vals,*work; 167 PetscReal *rwork; 168 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 169 PetscInt ne,nv,Lv,order,n,field; 170 PetscInt n_neigh,*neigh,*n_shared,**shared; 171 PetscInt i,j,extmem,cum,maxsize,nee; 172 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 173 PetscInt *sfvleaves,*sfvroots; 174 PetscInt *corners,*cedges; 175 PetscInt *ecount,**eneighs,*vcount,**vneighs; 176 PetscInt *emarks; 177 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 178 179 PetscFunctionBegin; 180 /* If the discrete gradient is defined for a subset of dofs and global is true, 181 it assumes G is given in global ordering for all the dofs. 182 Otherwise, the ordering is global for the Nedelec field */ 183 order = pcbddc->nedorder; 184 conforming = pcbddc->conforming; 185 field = pcbddc->nedfield; 186 global = pcbddc->nedglobal; 187 setprimal = PETSC_FALSE; 188 print = PETSC_FALSE; 189 singular = PETSC_FALSE; 190 191 /* Command line customization */ 192 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC"); 193 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL)); 194 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL)); 195 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL)); 196 /* print debug info TODO: to be removed */ 197 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL)); 198 PetscOptionsEnd(); 199 200 /* Return if there are no edges in the decomposition and the problem is not singular */ 201 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&al2g,NULL)); 202 PetscCall(ISLocalToGlobalMappingGetSize(al2g,&n)); 203 PetscCall(PetscObjectGetComm((PetscObject)pc,&comm)); 204 if (!singular) { 205 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 206 lrc[0] = PETSC_FALSE; 207 for (i=0;i<n;i++) { 208 if (PetscRealPart(vals[i]) > 2.) { 209 lrc[0] = PETSC_TRUE; 210 break; 211 } 212 } 213 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 214 PetscCall(MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm)); 215 if (!lrc[1]) PetscFunctionReturn(0); 216 } 217 218 /* Get Nedelec field */ 219 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal,comm,PETSC_ERR_USER,"Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT,field,pcbddc->n_ISForDofsLocal); 220 if (pcbddc->n_ISForDofsLocal && field >= 0) { 221 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 222 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 223 PetscCall(ISGetLocalSize(nedfieldlocal,&ne)); 224 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 225 ne = n; 226 nedfieldlocal = NULL; 227 global = PETSC_TRUE; 228 } else if (field == PETSC_DECIDE) { 229 PetscInt rst,ren,*idx; 230 231 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 232 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 233 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren)); 234 for (i=rst;i<ren;i++) { 235 PetscInt nc; 236 237 PetscCall(MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 238 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 239 PetscCall(MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 240 } 241 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 242 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 243 PetscCall(PetscMalloc1(n,&idx)); 244 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 245 PetscCall(ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal)); 246 } else { 247 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 248 } 249 250 /* Sanity checks */ 251 PetscCheck(order || conforming,comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 252 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix,comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 253 PetscCheck(!order || (ne%order == 0),PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT,ne,order); 254 255 /* Just set primal dofs and return */ 256 if (setprimal) { 257 IS enedfieldlocal; 258 PetscInt *eidxs; 259 260 PetscCall(PetscMalloc1(ne,&eidxs)); 261 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 262 if (nedfieldlocal) { 263 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 264 for (i=0,cum=0;i<ne;i++) { 265 if (PetscRealPart(vals[idxs[i]]) > 2.) { 266 eidxs[cum++] = idxs[i]; 267 } 268 } 269 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 270 } else { 271 for (i=0,cum=0;i<ne;i++) { 272 if (PetscRealPart(vals[i]) > 2.) { 273 eidxs[cum++] = i; 274 } 275 } 276 } 277 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 278 PetscCall(ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal)); 279 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal)); 280 PetscCall(PetscFree(eidxs)); 281 PetscCall(ISDestroy(&nedfieldlocal)); 282 PetscCall(ISDestroy(&enedfieldlocal)); 283 PetscFunctionReturn(0); 284 } 285 286 /* Compute some l2g maps */ 287 if (nedfieldlocal) { 288 IS is; 289 290 /* need to map from the local Nedelec field to local numbering */ 291 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g)); 292 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 293 PetscCall(ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is)); 294 PetscCall(ISLocalToGlobalMappingCreateIS(is,&al2g)); 295 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 296 if (global) { 297 PetscCall(PetscObjectReference((PetscObject)al2g)); 298 el2g = al2g; 299 } else { 300 IS gis; 301 302 PetscCall(ISRenumber(is,NULL,NULL,&gis)); 303 PetscCall(ISLocalToGlobalMappingCreateIS(gis,&el2g)); 304 PetscCall(ISDestroy(&gis)); 305 } 306 PetscCall(ISDestroy(&is)); 307 } else { 308 /* restore default */ 309 pcbddc->nedfield = -1; 310 /* one ref for the destruction of al2g, one for el2g */ 311 PetscCall(PetscObjectReference((PetscObject)al2g)); 312 PetscCall(PetscObjectReference((PetscObject)al2g)); 313 el2g = al2g; 314 fl2g = NULL; 315 } 316 317 /* Start communication to drop connections for interior edges (for cc analysis only) */ 318 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 319 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 320 if (nedfieldlocal) { 321 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 322 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 323 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 324 } else { 325 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 326 } 327 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 328 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 329 330 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 331 PetscCall(MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G)); 332 PetscCall(MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 333 if (global) { 334 PetscInt rst; 335 336 PetscCall(MatGetOwnershipRange(G,&rst,NULL)); 337 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 338 if (matis->sf_rootdata[i] < 2) { 339 matis->sf_rootdata[cum++] = i + rst; 340 } 341 } 342 PetscCall(MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE)); 343 PetscCall(MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL)); 344 } else { 345 PetscInt *tbz; 346 347 PetscCall(PetscMalloc1(ne,&tbz)); 348 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 349 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 350 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 351 for (i=0,cum=0;i<ne;i++) 352 if (matis->sf_leafdata[idxs[i]] == 1) 353 tbz[cum++] = i; 354 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 355 PetscCall(ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz)); 356 PetscCall(MatZeroRows(G,cum,tbz,0.,NULL,NULL)); 357 PetscCall(PetscFree(tbz)); 358 } 359 } else { /* we need the entire G to infer the nullspace */ 360 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 361 G = pcbddc->discretegradient; 362 } 363 364 /* Extract subdomain relevant rows of G */ 365 PetscCall(ISLocalToGlobalMappingGetIndices(el2g,&idxs)); 366 PetscCall(ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned)); 367 PetscCall(MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall)); 368 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g,&idxs)); 369 PetscCall(ISDestroy(&lned)); 370 PetscCall(MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis)); 371 PetscCall(MatDestroy(&lGall)); 372 PetscCall(MatISGetLocalMat(lGis,&lG)); 373 374 /* SF for nodal dofs communications */ 375 PetscCall(MatGetLocalSize(G,NULL,&Lv)); 376 PetscCall(MatISGetLocalToGlobalMapping(lGis,NULL,&vl2g)); 377 PetscCall(PetscObjectReference((PetscObject)vl2g)); 378 PetscCall(ISLocalToGlobalMappingGetSize(vl2g,&nv)); 379 PetscCall(PetscSFCreate(comm,&sfv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g,&idxs)); 381 PetscCall(PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs)); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs)); 383 i = singular ? 2 : 1; 384 PetscCall(PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots)); 385 386 /* Destroy temporary G created in MATIS format and modified G */ 387 PetscCall(PetscObjectReference((PetscObject)lG)); 388 PetscCall(MatDestroy(&lGis)); 389 PetscCall(MatDestroy(&G)); 390 391 if (print) { 392 PetscCall(PetscObjectSetName((PetscObject)lG,"initial_lG")); 393 PetscCall(MatView(lG,NULL)); 394 } 395 396 /* Save lG for values insertion in change of basis */ 397 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGinit)); 398 399 /* Analyze the edge-nodes connections (duplicate lG) */ 400 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGe)); 401 PetscCall(MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 402 PetscCall(PetscBTCreate(nv,&btv)); 403 PetscCall(PetscBTCreate(ne,&bte)); 404 PetscCall(PetscBTCreate(ne,&btb)); 405 PetscCall(PetscBTCreate(ne,&btbd)); 406 PetscCall(PetscBTCreate(nv,&btvcand)); 407 /* need to import the boundary specification to ensure the 408 proper detection of coarse edges' endpoints */ 409 if (pcbddc->DirichletBoundariesLocal) { 410 IS is; 411 412 if (fl2g) { 413 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is)); 414 } else { 415 is = pcbddc->DirichletBoundariesLocal; 416 } 417 PetscCall(ISGetLocalSize(is,&cum)); 418 PetscCall(ISGetIndices(is,&idxs)); 419 for (i=0;i<cum;i++) { 420 if (idxs[i] >= 0) { 421 PetscCall(PetscBTSet(btb,idxs[i])); 422 PetscCall(PetscBTSet(btbd,idxs[i])); 423 } 424 } 425 PetscCall(ISRestoreIndices(is,&idxs)); 426 if (fl2g) { 427 PetscCall(ISDestroy(&is)); 428 } 429 } 430 if (pcbddc->NeumannBoundariesLocal) { 431 IS is; 432 433 if (fl2g) { 434 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is)); 435 } else { 436 is = pcbddc->NeumannBoundariesLocal; 437 } 438 PetscCall(ISGetLocalSize(is,&cum)); 439 PetscCall(ISGetIndices(is,&idxs)); 440 for (i=0;i<cum;i++) { 441 if (idxs[i] >= 0) { 442 PetscCall(PetscBTSet(btb,idxs[i])); 443 } 444 } 445 PetscCall(ISRestoreIndices(is,&idxs)); 446 if (fl2g) { 447 PetscCall(ISDestroy(&is)); 448 } 449 } 450 451 /* Count neighs per dof */ 452 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs)); 453 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs)); 454 455 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 456 for proper detection of coarse edges' endpoints */ 457 PetscCall(PetscBTCreate(ne,&btee)); 458 for (i=0;i<ne;i++) { 459 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 460 PetscCall(PetscBTSet(btee,i)); 461 } 462 } 463 PetscCall(PetscMalloc1(ne,&marks)); 464 if (!conforming) { 465 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 466 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 467 } 468 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 469 PetscCall(MatSeqAIJGetArray(lGe,&vals)); 470 cum = 0; 471 for (i=0;i<ne;i++) { 472 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 473 if (!PetscBTLookup(btee,i)) { 474 marks[cum++] = i; 475 continue; 476 } 477 /* set badly connected edge dofs as primal */ 478 if (!conforming) { 479 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 480 marks[cum++] = i; 481 PetscCall(PetscBTSet(bte,i)); 482 for (j=ii[i];j<ii[i+1];j++) { 483 PetscCall(PetscBTSet(btv,jj[j])); 484 } 485 } else { 486 /* every edge dofs should be connected trough a certain number of nodal dofs 487 to other edge dofs belonging to coarse edges 488 - at most 2 endpoints 489 - order-1 interior nodal dofs 490 - no undefined nodal dofs (nconn < order) 491 */ 492 PetscInt ends = 0,ints = 0, undef = 0; 493 for (j=ii[i];j<ii[i+1];j++) { 494 PetscInt v = jj[j],k; 495 PetscInt nconn = iit[v+1]-iit[v]; 496 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 497 if (nconn > order) ends++; 498 else if (nconn == order) ints++; 499 else undef++; 500 } 501 if (undef || ends > 2 || ints != order -1) { 502 marks[cum++] = i; 503 PetscCall(PetscBTSet(bte,i)); 504 for (j=ii[i];j<ii[i+1];j++) { 505 PetscCall(PetscBTSet(btv,jj[j])); 506 } 507 } 508 } 509 } 510 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 511 if (!order && ii[i+1] != ii[i]) { 512 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 513 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 514 } 515 } 516 PetscCall(PetscBTDestroy(&btee)); 517 PetscCall(MatSeqAIJRestoreArray(lGe,&vals)); 518 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 519 if (!conforming) { 520 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 521 PetscCall(MatDestroy(&lGt)); 522 } 523 PetscCall(MatZeroRows(lGe,cum,marks,0.,NULL,NULL)); 524 525 /* identify splitpoints and corner candidates */ 526 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 527 if (print) { 528 PetscCall(PetscObjectSetName((PetscObject)lGe,"edgerestr_lG")); 529 PetscCall(MatView(lGe,NULL)); 530 PetscCall(PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt")); 531 PetscCall(MatView(lGt,NULL)); 532 } 533 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 534 PetscCall(MatSeqAIJGetArray(lGt,&vals)); 535 for (i=0;i<nv;i++) { 536 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 537 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 538 if (!order) { /* variable order */ 539 PetscReal vorder = 0.; 540 541 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 542 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 543 PetscCheck(vorder-test <= PETSC_SQRT_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%" PetscInt_FMT ")",(double)vorder,test); 544 ord = 1; 545 } 546 PetscAssert(test%ord == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT,test,i,ord); 547 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 548 if (PetscBTLookup(btbd,jj[j])) { 549 bdir = PETSC_TRUE; 550 break; 551 } 552 if (vc != ecount[jj[j]]) { 553 sneighs = PETSC_FALSE; 554 } else { 555 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 556 for (k=0;k<vc;k++) { 557 if (vn[k] != en[k]) { 558 sneighs = PETSC_FALSE; 559 break; 560 } 561 } 562 } 563 } 564 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 565 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n",i,PetscBools[!sneighs],PetscBools[test >= 3*ord],PetscBools[bdir]); 566 PetscCall(PetscBTSet(btv,i)); 567 } else if (test == ord) { 568 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 569 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %" PetscInt_FMT "\n",i); 570 PetscCall(PetscBTSet(btv,i)); 571 } else { 572 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %" PetscInt_FMT "\n",i); 573 PetscCall(PetscBTSet(btvcand,i)); 574 } 575 } 576 } 577 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs)); 578 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs)); 579 PetscCall(PetscBTDestroy(&btbd)); 580 581 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 582 if (order != 1) { 583 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 584 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 585 for (i=0;i<nv;i++) { 586 if (PetscBTLookup(btvcand,i)) { 587 PetscBool found = PETSC_FALSE; 588 for (j=ii[i];j<ii[i+1] && !found;j++) { 589 PetscInt k,e = jj[j]; 590 if (PetscBTLookup(bte,e)) continue; 591 for (k=iit[e];k<iit[e+1];k++) { 592 PetscInt v = jjt[k]; 593 if (v != i && PetscBTLookup(btvcand,v)) { 594 found = PETSC_TRUE; 595 break; 596 } 597 } 598 } 599 if (!found) { 600 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %" PetscInt_FMT " CLEARED\n",i); 601 PetscCall(PetscBTClear(btvcand,i)); 602 } else { 603 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %" PetscInt_FMT " ACCEPTED\n",i); 604 } 605 } 606 } 607 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 608 } 609 PetscCall(MatSeqAIJRestoreArray(lGt,&vals)); 610 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 611 PetscCall(MatDestroy(&lGe)); 612 613 /* Get the local G^T explicitly */ 614 PetscCall(MatDestroy(&lGt)); 615 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 616 PetscCall(MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 617 618 /* Mark interior nodal dofs */ 619 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 620 PetscCall(PetscBTCreate(nv,&btvi)); 621 for (i=1;i<n_neigh;i++) { 622 for (j=0;j<n_shared[i];j++) { 623 PetscCall(PetscBTSet(btvi,shared[i][j])); 624 } 625 } 626 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 627 628 /* communicate corners and splitpoints */ 629 PetscCall(PetscMalloc1(nv,&vmarks)); 630 PetscCall(PetscArrayzero(sfvleaves,nv)); 631 PetscCall(PetscArrayzero(sfvroots,Lv)); 632 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 633 634 if (print) { 635 IS tbz; 636 637 cum = 0; 638 for (i=0;i<nv;i++) 639 if (sfvleaves[i]) 640 vmarks[cum++] = i; 641 642 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 643 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local")); 644 PetscCall(ISView(tbz,NULL)); 645 PetscCall(ISDestroy(&tbz)); 646 } 647 648 PetscCall(PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 649 PetscCall(PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 650 PetscCall(PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 651 PetscCall(PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 652 653 /* Zero rows of lGt corresponding to identified corners 654 and interior nodal dofs */ 655 cum = 0; 656 for (i=0;i<nv;i++) { 657 if (sfvleaves[i]) { 658 vmarks[cum++] = i; 659 PetscCall(PetscBTSet(btv,i)); 660 } 661 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 662 } 663 PetscCall(PetscBTDestroy(&btvi)); 664 if (print) { 665 IS tbz; 666 667 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 668 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior")); 669 PetscCall(ISView(tbz,NULL)); 670 PetscCall(ISDestroy(&tbz)); 671 } 672 PetscCall(MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL)); 673 PetscCall(PetscFree(vmarks)); 674 PetscCall(PetscSFDestroy(&sfv)); 675 PetscCall(PetscFree2(sfvleaves,sfvroots)); 676 677 /* Recompute G */ 678 PetscCall(MatDestroy(&lG)); 679 PetscCall(MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG)); 680 if (print) { 681 PetscCall(PetscObjectSetName((PetscObject)lG,"used_lG")); 682 PetscCall(MatView(lG,NULL)); 683 PetscCall(PetscObjectSetName((PetscObject)lGt,"used_lGt")); 684 PetscCall(MatView(lGt,NULL)); 685 } 686 687 /* Get primal dofs (if any) */ 688 cum = 0; 689 for (i=0;i<ne;i++) { 690 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 691 } 692 if (fl2g) { 693 PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,marks,marks)); 694 } 695 PetscCall(ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals)); 696 if (print) { 697 PetscCall(PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs")); 698 PetscCall(ISView(primals,NULL)); 699 } 700 PetscCall(PetscBTDestroy(&bte)); 701 /* TODO: what if the user passed in some of them ? */ 702 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 703 PetscCall(ISDestroy(&primals)); 704 705 /* Compute edge connectivity */ 706 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_")); 707 708 /* Symbolic conn = lG*lGt */ 709 PetscCall(MatProductCreate(lG,lGt,NULL,&conn)); 710 PetscCall(MatProductSetType(conn,MATPRODUCT_AB)); 711 PetscCall(MatProductSetAlgorithm(conn,"default")); 712 PetscCall(MatProductSetFill(conn,PETSC_DEFAULT)); 713 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_")); 714 PetscCall(MatProductSetFromOptions(conn)); 715 PetscCall(MatProductSymbolic(conn)); 716 717 PetscCall(MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 718 if (fl2g) { 719 PetscBT btf; 720 PetscInt *iia,*jja,*iiu,*jju; 721 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 722 723 /* create CSR for all local dofs */ 724 PetscCall(PetscMalloc1(n+1,&iia)); 725 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 726 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT,pcbddc->mat_graph->nvtxs_csr,n); 727 iiu = pcbddc->mat_graph->xadj; 728 jju = pcbddc->mat_graph->adjncy; 729 } else if (pcbddc->use_local_adj) { 730 rest = PETSC_TRUE; 731 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 732 } else { 733 free = PETSC_TRUE; 734 PetscCall(PetscMalloc2(n+1,&iiu,n,&jju)); 735 iiu[0] = 0; 736 for (i=0;i<n;i++) { 737 iiu[i+1] = i+1; 738 jju[i] = -1; 739 } 740 } 741 742 /* import sizes of CSR */ 743 iia[0] = 0; 744 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 745 746 /* overwrite entries corresponding to the Nedelec field */ 747 PetscCall(PetscBTCreate(n,&btf)); 748 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 749 for (i=0;i<ne;i++) { 750 PetscCall(PetscBTSet(btf,idxs[i])); 751 iia[idxs[i]+1] = ii[i+1]-ii[i]; 752 } 753 754 /* iia in CSR */ 755 for (i=0;i<n;i++) iia[i+1] += iia[i]; 756 757 /* jja in CSR */ 758 PetscCall(PetscMalloc1(iia[n],&jja)); 759 for (i=0;i<n;i++) 760 if (!PetscBTLookup(btf,i)) 761 for (j=0;j<iiu[i+1]-iiu[i];j++) 762 jja[iia[i]+j] = jju[iiu[i]+j]; 763 764 /* map edge dofs connectivity */ 765 if (jj) { 766 PetscCall(ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj)); 767 for (i=0;i<ne;i++) { 768 PetscInt e = idxs[i]; 769 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 770 } 771 } 772 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 773 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER)); 774 if (rest) { 775 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 776 } 777 if (free) { 778 PetscCall(PetscFree2(iiu,jju)); 779 } 780 PetscCall(PetscBTDestroy(&btf)); 781 } else { 782 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER)); 783 } 784 785 /* Analyze interface for edge dofs */ 786 PetscCall(PCBDDCAnalyzeInterface(pc)); 787 pcbddc->mat_graph->twodim = PETSC_FALSE; 788 789 /* Get coarse edges in the edge space */ 790 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 791 PetscCall(MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 792 793 if (fl2g) { 794 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 795 PetscCall(PetscMalloc1(nee,&eedges)); 796 for (i=0;i<nee;i++) { 797 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 798 } 799 } else { 800 eedges = alleedges; 801 primals = allprimals; 802 } 803 804 /* Mark fine edge dofs with their coarse edge id */ 805 PetscCall(PetscArrayzero(marks,ne)); 806 PetscCall(ISGetLocalSize(primals,&cum)); 807 PetscCall(ISGetIndices(primals,&idxs)); 808 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 809 PetscCall(ISRestoreIndices(primals,&idxs)); 810 if (print) { 811 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs")); 812 PetscCall(ISView(primals,NULL)); 813 } 814 815 maxsize = 0; 816 for (i=0;i<nee;i++) { 817 PetscInt size,mark = i+1; 818 819 PetscCall(ISGetLocalSize(eedges[i],&size)); 820 PetscCall(ISGetIndices(eedges[i],&idxs)); 821 for (j=0;j<size;j++) marks[idxs[j]] = mark; 822 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 823 maxsize = PetscMax(maxsize,size); 824 } 825 826 /* Find coarse edge endpoints */ 827 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 828 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 829 for (i=0;i<nee;i++) { 830 PetscInt mark = i+1,size; 831 832 PetscCall(ISGetLocalSize(eedges[i],&size)); 833 if (!size && nedfieldlocal) continue; 834 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 835 PetscCall(ISGetIndices(eedges[i],&idxs)); 836 if (print) { 837 PetscCall(PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n",i)); 838 PetscCall(ISView(eedges[i],NULL)); 839 } 840 for (j=0;j<size;j++) { 841 PetscInt k, ee = idxs[j]; 842 if (print) PetscPrintf(PETSC_COMM_SELF," idx %" PetscInt_FMT "\n",ee); 843 for (k=ii[ee];k<ii[ee+1];k++) { 844 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %" PetscInt_FMT "\n",jj[k]); 845 if (PetscBTLookup(btv,jj[k])) { 846 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %" PetscInt_FMT "\n",jj[k]); 847 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 848 PetscInt k2; 849 PetscBool corner = PETSC_FALSE; 850 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 851 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,(int)!!PetscBTLookup(btb,jjt[k2])); 852 /* it's a corner if either is connected with an edge dof belonging to a different cc or 853 if the edge dof lie on the natural part of the boundary */ 854 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 855 corner = PETSC_TRUE; 856 break; 857 } 858 } 859 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 860 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %" PetscInt_FMT "\n",jj[k]); 861 PetscCall(PetscBTSet(btv,jj[k])); 862 } else { 863 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 864 } 865 } 866 } 867 } 868 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 869 } 870 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 871 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 872 PetscCall(PetscBTDestroy(&btb)); 873 874 /* Reset marked primal dofs */ 875 PetscCall(ISGetLocalSize(primals,&cum)); 876 PetscCall(ISGetIndices(primals,&idxs)); 877 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 878 PetscCall(ISRestoreIndices(primals,&idxs)); 879 880 /* Now use the initial lG */ 881 PetscCall(MatDestroy(&lG)); 882 PetscCall(MatDestroy(&lGt)); 883 lG = lGinit; 884 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 885 886 /* Compute extended cols indices */ 887 PetscCall(PetscBTCreate(nv,&btvc)); 888 PetscCall(PetscBTCreate(nee,&bter)); 889 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 890 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG,&i)); 891 i *= maxsize; 892 PetscCall(PetscCalloc1(nee,&extcols)); 893 PetscCall(PetscMalloc2(i,&extrow,i,&gidxs)); 894 eerr = PETSC_FALSE; 895 for (i=0;i<nee;i++) { 896 PetscInt size,found = 0; 897 898 cum = 0; 899 PetscCall(ISGetLocalSize(eedges[i],&size)); 900 if (!size && nedfieldlocal) continue; 901 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 902 PetscCall(ISGetIndices(eedges[i],&idxs)); 903 PetscCall(PetscBTMemzero(nv,btvc)); 904 for (j=0;j<size;j++) { 905 PetscInt k,ee = idxs[j]; 906 for (k=ii[ee];k<ii[ee+1];k++) { 907 PetscInt vv = jj[k]; 908 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 909 else if (!PetscBTLookupSet(btvc,vv)) found++; 910 } 911 } 912 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 913 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 914 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 915 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 916 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 917 /* it may happen that endpoints are not defined at this point 918 if it is the case, mark this edge for a second pass */ 919 if (cum != size -1 || found != 2) { 920 PetscCall(PetscBTSet(bter,i)); 921 if (print) { 922 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge")); 923 PetscCall(ISView(eedges[i],NULL)); 924 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol")); 925 PetscCall(ISView(extcols[i],NULL)); 926 } 927 eerr = PETSC_TRUE; 928 } 929 } 930 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 931 PetscCall(MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm)); 932 if (done) { 933 PetscInt *newprimals; 934 935 PetscCall(PetscMalloc1(ne,&newprimals)); 936 PetscCall(ISGetLocalSize(primals,&cum)); 937 PetscCall(ISGetIndices(primals,&idxs)); 938 PetscCall(PetscArraycpy(newprimals,idxs,cum)); 939 PetscCall(ISRestoreIndices(primals,&idxs)); 940 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 941 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %s)\n",PetscBools[eerr]); 942 for (i=0;i<nee;i++) { 943 PetscBool has_candidates = PETSC_FALSE; 944 if (PetscBTLookup(bter,i)) { 945 PetscInt size,mark = i+1; 946 947 PetscCall(ISGetLocalSize(eedges[i],&size)); 948 PetscCall(ISGetIndices(eedges[i],&idxs)); 949 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 950 for (j=0;j<size;j++) { 951 PetscInt k,ee = idxs[j]; 952 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n",ee,ii[ee],ii[ee+1]); 953 for (k=ii[ee];k<ii[ee+1];k++) { 954 /* set all candidates located on the edge as corners */ 955 if (PetscBTLookup(btvcand,jj[k])) { 956 PetscInt k2,vv = jj[k]; 957 has_candidates = PETSC_TRUE; 958 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %" PetscInt_FMT "\n",vv); 959 PetscCall(PetscBTSet(btv,vv)); 960 /* set all edge dofs connected to candidate as primals */ 961 for (k2=iit[vv];k2<iit[vv+1];k2++) { 962 if (marks[jjt[k2]] == mark) { 963 PetscInt k3,ee2 = jjt[k2]; 964 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %" PetscInt_FMT "\n",ee2); 965 newprimals[cum++] = ee2; 966 /* finally set the new corners */ 967 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 968 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %" PetscInt_FMT "\n",jj[k3]); 969 PetscCall(PetscBTSet(btv,jj[k3])); 970 } 971 } 972 } 973 } else { 974 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %" PetscInt_FMT "\n",jj[k]); 975 } 976 } 977 } 978 if (!has_candidates) { /* circular edge */ 979 PetscInt k, ee = idxs[0],*tmarks; 980 981 PetscCall(PetscCalloc1(ne,&tmarks)); 982 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %" PetscInt_FMT "\n",i); 983 for (k=ii[ee];k<ii[ee+1];k++) { 984 PetscInt k2; 985 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %" PetscInt_FMT "\n",jj[k]); 986 PetscCall(PetscBTSet(btv,jj[k])); 987 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 988 } 989 for (j=0;j<size;j++) { 990 if (tmarks[idxs[j]] > 1) { 991 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %" PetscInt_FMT "\n",idxs[j]); 992 newprimals[cum++] = idxs[j]; 993 } 994 } 995 PetscCall(PetscFree(tmarks)); 996 } 997 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 998 } 999 PetscCall(ISDestroy(&extcols[i])); 1000 } 1001 PetscCall(PetscFree(extcols)); 1002 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 1003 PetscCall(PetscSortRemoveDupsInt(&cum,newprimals)); 1004 if (fl2g) { 1005 PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals)); 1006 PetscCall(ISDestroy(&primals)); 1007 for (i=0;i<nee;i++) { 1008 PetscCall(ISDestroy(&eedges[i])); 1009 } 1010 PetscCall(PetscFree(eedges)); 1011 } 1012 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1013 PetscCall(ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals)); 1014 PetscCall(PetscFree(newprimals)); 1015 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 1016 PetscCall(ISDestroy(&primals)); 1017 PetscCall(PCBDDCAnalyzeInterface(pc)); 1018 pcbddc->mat_graph->twodim = PETSC_FALSE; 1019 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1020 if (fl2g) { 1021 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 1022 PetscCall(PetscMalloc1(nee,&eedges)); 1023 for (i=0;i<nee;i++) { 1024 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 1025 } 1026 } else { 1027 eedges = alleedges; 1028 primals = allprimals; 1029 } 1030 PetscCall(PetscCalloc1(nee,&extcols)); 1031 1032 /* Mark again */ 1033 PetscCall(PetscArrayzero(marks,ne)); 1034 for (i=0;i<nee;i++) { 1035 PetscInt size,mark = i+1; 1036 1037 PetscCall(ISGetLocalSize(eedges[i],&size)); 1038 PetscCall(ISGetIndices(eedges[i],&idxs)); 1039 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1040 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1041 } 1042 if (print) { 1043 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass")); 1044 PetscCall(ISView(primals,NULL)); 1045 } 1046 1047 /* Recompute extended cols */ 1048 eerr = PETSC_FALSE; 1049 for (i=0;i<nee;i++) { 1050 PetscInt size; 1051 1052 cum = 0; 1053 PetscCall(ISGetLocalSize(eedges[i],&size)); 1054 if (!size && nedfieldlocal) continue; 1055 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 1056 PetscCall(ISGetIndices(eedges[i],&idxs)); 1057 for (j=0;j<size;j++) { 1058 PetscInt k,ee = idxs[j]; 1059 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1060 } 1061 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1062 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 1063 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 1064 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 1065 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 1066 if (cum != size -1) { 1067 if (print) { 1068 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass")); 1069 PetscCall(ISView(eedges[i],NULL)); 1070 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass")); 1071 PetscCall(ISView(extcols[i],NULL)); 1072 } 1073 eerr = PETSC_TRUE; 1074 } 1075 } 1076 } 1077 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1078 PetscCall(PetscFree2(extrow,gidxs)); 1079 PetscCall(PetscBTDestroy(&bter)); 1080 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF)); 1081 /* an error should not occur at this point */ 1082 PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1083 1084 /* Check the number of endpoints */ 1085 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1086 PetscCall(PetscMalloc1(2*nee,&corners)); 1087 PetscCall(PetscMalloc1(nee,&cedges)); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size, found = 0, gc[2]; 1090 1091 /* init with defaults */ 1092 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1093 PetscCall(ISGetLocalSize(eedges[i],&size)); 1094 if (!size && nedfieldlocal) continue; 1095 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 1096 PetscCall(ISGetIndices(eedges[i],&idxs)); 1097 PetscCall(PetscBTMemzero(nv,btvc)); 1098 for (j=0;j<size;j++) { 1099 PetscInt k,ee = idxs[j]; 1100 for (k=ii[ee];k<ii[ee+1];k++) { 1101 PetscInt vv = jj[k]; 1102 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1103 PetscCheck(found != 2,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %" PetscInt_FMT,i); 1104 corners[i*2+found++] = vv; 1105 } 1106 } 1107 } 1108 if (found != 2) { 1109 PetscInt e; 1110 if (fl2g) { 1111 PetscCall(ISLocalToGlobalMappingApply(fl2g,1,idxs,&e)); 1112 } else { 1113 e = idxs[0]; 1114 } 1115 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")",found,i,e,idxs[0]); 1116 } 1117 1118 /* get primal dof index on this coarse edge */ 1119 PetscCall(ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc)); 1120 if (gc[0] > gc[1]) { 1121 PetscInt swap = corners[2*i]; 1122 corners[2*i] = corners[2*i+1]; 1123 corners[2*i+1] = swap; 1124 } 1125 cedges[i] = idxs[size-1]; 1126 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1127 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1128 } 1129 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1130 PetscCall(PetscBTDestroy(&btvc)); 1131 1132 if (PetscDefined(USE_DEBUG)) { 1133 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1134 not interfere with neighbouring coarse edges */ 1135 PetscCall(PetscMalloc1(nee+1,&emarks)); 1136 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1137 for (i=0;i<nv;i++) { 1138 PetscInt emax = 0,eemax = 0; 1139 1140 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1141 PetscCall(PetscArrayzero(emarks,nee+1)); 1142 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1143 for (j=1;j<nee+1;j++) { 1144 if (emax < emarks[j]) { 1145 emax = emarks[j]; 1146 eemax = j; 1147 } 1148 } 1149 /* not relevant for edges */ 1150 if (!eemax) continue; 1151 1152 for (j=ii[i];j<ii[i+1];j++) { 1153 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1154 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT,marks[jj[j]]-1,eemax,i,jj[j]); 1155 } 1156 } 1157 } 1158 PetscCall(PetscFree(emarks)); 1159 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1160 } 1161 1162 /* Compute extended rows indices for edge blocks of the change of basis */ 1163 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1164 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt,&extmem)); 1165 extmem *= maxsize; 1166 PetscCall(PetscMalloc1(extmem*nee,&extrow)); 1167 PetscCall(PetscMalloc1(nee,&extrows)); 1168 PetscCall(PetscCalloc1(nee,&extrowcum)); 1169 for (i=0;i<nv;i++) { 1170 PetscInt mark = 0,size,start; 1171 1172 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1173 for (j=ii[i];j<ii[i+1];j++) 1174 if (marks[jj[j]] && !mark) 1175 mark = marks[jj[j]]; 1176 1177 /* not relevant */ 1178 if (!mark) continue; 1179 1180 /* import extended row */ 1181 mark--; 1182 start = mark*extmem+extrowcum[mark]; 1183 size = ii[i+1]-ii[i]; 1184 PetscCheck(extrowcum[mark] + size <= extmem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT,extrowcum[mark] + size,extmem); 1185 PetscCall(PetscArraycpy(extrow+start,jj+ii[i],size)); 1186 extrowcum[mark] += size; 1187 } 1188 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1189 PetscCall(MatDestroy(&lGt)); 1190 PetscCall(PetscFree(marks)); 1191 1192 /* Compress extrows */ 1193 cum = 0; 1194 for (i=0;i<nee;i++) { 1195 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1196 PetscCall(PetscSortRemoveDupsInt(&size,start)); 1197 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i])); 1198 cum = PetscMax(cum,size); 1199 } 1200 PetscCall(PetscFree(extrowcum)); 1201 PetscCall(PetscBTDestroy(&btv)); 1202 PetscCall(PetscBTDestroy(&btvcand)); 1203 1204 /* Workspace for lapack inner calls and VecSetValues */ 1205 PetscCall(PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork)); 1206 1207 /* Create change of basis matrix (preallocation can be improved) */ 1208 PetscCall(MatCreate(comm,&T)); 1209 PetscCall(MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,pc->pmat->rmap->N,pc->pmat->rmap->N)); 1210 PetscCall(MatSetType(T,MATAIJ)); 1211 PetscCall(MatSeqAIJSetPreallocation(T,10,NULL)); 1212 PetscCall(MatMPIAIJSetPreallocation(T,10,NULL,10,NULL)); 1213 PetscCall(MatSetLocalToGlobalMapping(T,al2g,al2g)); 1214 PetscCall(MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 1215 PetscCall(MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE)); 1216 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1217 1218 /* Defaults to identity */ 1219 PetscCall(MatCreateVecs(pc->pmat,&tvec,NULL)); 1220 PetscCall(VecSet(tvec,1.0)); 1221 PetscCall(MatDiagonalSet(T,tvec,INSERT_VALUES)); 1222 PetscCall(VecDestroy(&tvec)); 1223 1224 /* Create discrete gradient for the coarser level if needed */ 1225 PetscCall(MatDestroy(&pcbddc->nedcG)); 1226 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1227 if (pcbddc->current_level < pcbddc->max_levels) { 1228 ISLocalToGlobalMapping cel2g,cvl2g; 1229 IS wis,gwis; 1230 PetscInt cnv,cne; 1231 1232 PetscCall(ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis)); 1233 if (fl2g) { 1234 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal)); 1235 } else { 1236 PetscCall(PetscObjectReference((PetscObject)wis)); 1237 pcbddc->nedclocal = wis; 1238 } 1239 PetscCall(ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis)); 1240 PetscCall(ISDestroy(&wis)); 1241 PetscCall(ISRenumber(gwis,NULL,&cne,&wis)); 1242 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cel2g)); 1243 PetscCall(ISDestroy(&wis)); 1244 PetscCall(ISDestroy(&gwis)); 1245 1246 PetscCall(ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis)); 1247 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis)); 1248 PetscCall(ISDestroy(&wis)); 1249 PetscCall(ISRenumber(gwis,NULL,&cnv,&wis)); 1250 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cvl2g)); 1251 PetscCall(ISDestroy(&wis)); 1252 PetscCall(ISDestroy(&gwis)); 1253 1254 PetscCall(MatCreate(comm,&pcbddc->nedcG)); 1255 PetscCall(MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv)); 1256 PetscCall(MatSetType(pcbddc->nedcG,MATAIJ)); 1257 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL)); 1258 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL)); 1259 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g)); 1260 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1261 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1262 } 1263 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1264 1265 #if defined(PRINT_GDET) 1266 inc = 0; 1267 lev = pcbddc->current_level; 1268 #endif 1269 1270 /* Insert values in the change of basis matrix */ 1271 for (i=0;i<nee;i++) { 1272 Mat Gins = NULL, GKins = NULL; 1273 IS cornersis = NULL; 1274 PetscScalar cvals[2]; 1275 1276 if (pcbddc->nedcG) { 1277 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis)); 1278 } 1279 PetscCall(PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork)); 1280 if (Gins && GKins) { 1281 const PetscScalar *data; 1282 const PetscInt *rows,*cols; 1283 PetscInt nrh,nch,nrc,ncc; 1284 1285 PetscCall(ISGetIndices(eedges[i],&cols)); 1286 /* H1 */ 1287 PetscCall(ISGetIndices(extrows[i],&rows)); 1288 PetscCall(MatGetSize(Gins,&nrh,&nch)); 1289 PetscCall(MatDenseGetArrayRead(Gins,&data)); 1290 PetscCall(MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES)); 1291 PetscCall(MatDenseRestoreArrayRead(Gins,&data)); 1292 PetscCall(ISRestoreIndices(extrows[i],&rows)); 1293 /* complement */ 1294 PetscCall(MatGetSize(GKins,&nrc,&ncc)); 1295 PetscCheck(ncc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %" PetscInt_FMT,i); 1296 PetscCheck(ncc + nch == nrc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT,ncc,nch,nrc,i); 1297 PetscCheck(ncc == 1 || !pcbddc->nedcG,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT,i,ncc); 1298 PetscCall(MatDenseGetArrayRead(GKins,&data)); 1299 PetscCall(MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES)); 1300 PetscCall(MatDenseRestoreArrayRead(GKins,&data)); 1301 1302 /* coarse discrete gradient */ 1303 if (pcbddc->nedcG) { 1304 PetscInt cols[2]; 1305 1306 cols[0] = 2*i; 1307 cols[1] = 2*i+1; 1308 PetscCall(MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES)); 1309 } 1310 PetscCall(ISRestoreIndices(eedges[i],&cols)); 1311 } 1312 PetscCall(ISDestroy(&extrows[i])); 1313 PetscCall(ISDestroy(&extcols[i])); 1314 PetscCall(ISDestroy(&cornersis)); 1315 PetscCall(MatDestroy(&Gins)); 1316 PetscCall(MatDestroy(&GKins)); 1317 } 1318 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1319 1320 /* Start assembling */ 1321 PetscCall(MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY)); 1322 if (pcbddc->nedcG) { 1323 PetscCall(MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1324 } 1325 1326 /* Free */ 1327 if (fl2g) { 1328 PetscCall(ISDestroy(&primals)); 1329 for (i=0;i<nee;i++) { 1330 PetscCall(ISDestroy(&eedges[i])); 1331 } 1332 PetscCall(PetscFree(eedges)); 1333 } 1334 1335 /* hack mat_graph with primal dofs on the coarse edges */ 1336 { 1337 PCBDDCGraph graph = pcbddc->mat_graph; 1338 PetscInt *oqueue = graph->queue; 1339 PetscInt *ocptr = graph->cptr; 1340 PetscInt ncc,*idxs; 1341 1342 /* find first primal edge */ 1343 if (pcbddc->nedclocal) { 1344 PetscCall(ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1345 } else { 1346 if (fl2g) { 1347 PetscCall(ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges)); 1348 } 1349 idxs = cedges; 1350 } 1351 cum = 0; 1352 while (cum < nee && cedges[cum] < 0) cum++; 1353 1354 /* adapt connected components */ 1355 PetscCall(PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue)); 1356 graph->cptr[0] = 0; 1357 for (i=0,ncc=0;i<graph->ncc;i++) { 1358 PetscInt lc = ocptr[i+1]-ocptr[i]; 1359 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1360 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1361 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1362 ncc++; 1363 lc--; 1364 cum++; 1365 while (cum < nee && cedges[cum] < 0) cum++; 1366 } 1367 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1368 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1369 ncc++; 1370 } 1371 graph->ncc = ncc; 1372 if (pcbddc->nedclocal) { 1373 PetscCall(ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1374 } 1375 PetscCall(PetscFree2(ocptr,oqueue)); 1376 } 1377 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1378 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1379 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1380 PetscCall(MatDestroy(&conn)); 1381 1382 PetscCall(ISDestroy(&nedfieldlocal)); 1383 PetscCall(PetscFree(extrow)); 1384 PetscCall(PetscFree2(work,rwork)); 1385 PetscCall(PetscFree(corners)); 1386 PetscCall(PetscFree(cedges)); 1387 PetscCall(PetscFree(extrows)); 1388 PetscCall(PetscFree(extcols)); 1389 PetscCall(MatDestroy(&lG)); 1390 1391 /* Complete assembling */ 1392 PetscCall(MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY)); 1393 if (pcbddc->nedcG) { 1394 PetscCall(MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1395 #if 0 1396 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1397 PetscCall(MatView(pcbddc->nedcG,NULL)); 1398 #endif 1399 } 1400 1401 /* set change of basis */ 1402 PetscCall(PCBDDCSetChangeOfBasisMat(pc,T,singular)); 1403 PetscCall(MatDestroy(&T)); 1404 1405 PetscFunctionReturn(0); 1406 } 1407 1408 /* the near-null space of BDDC carries information on quadrature weights, 1409 and these can be collinear -> so cheat with MatNullSpaceCreate 1410 and create a suitable set of basis vectors first */ 1411 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1412 { 1413 PetscInt i; 1414 1415 PetscFunctionBegin; 1416 for (i=0;i<nvecs;i++) { 1417 PetscInt first,last; 1418 1419 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1420 PetscCheck(last-first >= 2*nvecs || !has_const,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1421 if (i>=first && i < last) { 1422 PetscScalar *data; 1423 PetscCall(VecGetArray(quad_vecs[i],&data)); 1424 if (!has_const) { 1425 data[i-first] = 1.; 1426 } else { 1427 data[2*i-first] = 1./PetscSqrtReal(2.); 1428 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1429 } 1430 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1431 } 1432 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1433 } 1434 PetscCall(MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp)); 1435 for (i=0;i<nvecs;i++) { /* reset vectors */ 1436 PetscInt first,last; 1437 PetscCall(VecLockReadPop(quad_vecs[i])); 1438 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1439 if (i>=first && i < last) { 1440 PetscScalar *data; 1441 PetscCall(VecGetArray(quad_vecs[i],&data)); 1442 if (!has_const) { 1443 data[i-first] = 0.; 1444 } else { 1445 data[2*i-first] = 0.; 1446 data[2*i-first+1] = 0.; 1447 } 1448 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1449 } 1450 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1451 PetscCall(VecLockReadPush(quad_vecs[i])); 1452 } 1453 PetscFunctionReturn(0); 1454 } 1455 1456 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1457 { 1458 Mat loc_divudotp; 1459 Vec p,v,vins,quad_vec,*quad_vecs; 1460 ISLocalToGlobalMapping map; 1461 PetscScalar *vals; 1462 const PetscScalar *array; 1463 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1464 PetscInt n_neigh,*neigh,*n_shared,**shared; 1465 PetscMPIInt rank; 1466 1467 PetscFunctionBegin; 1468 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1469 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1470 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A))); 1471 if (!maxneighs) { 1472 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1473 *nnsp = NULL; 1474 PetscFunctionReturn(0); 1475 } 1476 maxsize = 0; 1477 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1478 PetscCall(PetscMalloc2(maxsize,&gidxs,maxsize,&vals)); 1479 /* create vectors to hold quadrature weights */ 1480 PetscCall(MatCreateVecs(A,&quad_vec,NULL)); 1481 if (!transpose) { 1482 PetscCall(MatISGetLocalToGlobalMapping(A,&map,NULL)); 1483 } else { 1484 PetscCall(MatISGetLocalToGlobalMapping(A,NULL,&map)); 1485 } 1486 PetscCall(VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs)); 1487 PetscCall(VecDestroy(&quad_vec)); 1488 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp)); 1489 for (i=0;i<maxneighs;i++) { 1490 PetscCall(VecLockReadPop(quad_vecs[i])); 1491 } 1492 1493 /* compute local quad vec */ 1494 PetscCall(MatISGetLocalMat(divudotp,&loc_divudotp)); 1495 if (!transpose) { 1496 PetscCall(MatCreateVecs(loc_divudotp,&v,&p)); 1497 } else { 1498 PetscCall(MatCreateVecs(loc_divudotp,&p,&v)); 1499 } 1500 PetscCall(VecSet(p,1.)); 1501 if (!transpose) { 1502 PetscCall(MatMultTranspose(loc_divudotp,p,v)); 1503 } else { 1504 PetscCall(MatMult(loc_divudotp,p,v)); 1505 } 1506 if (vl2l) { 1507 Mat lA; 1508 VecScatter sc; 1509 1510 PetscCall(MatISGetLocalMat(A,&lA)); 1511 PetscCall(MatCreateVecs(lA,&vins,NULL)); 1512 PetscCall(VecScatterCreate(v,NULL,vins,vl2l,&sc)); 1513 PetscCall(VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1514 PetscCall(VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1515 PetscCall(VecScatterDestroy(&sc)); 1516 } else { 1517 vins = v; 1518 } 1519 PetscCall(VecGetArrayRead(vins,&array)); 1520 PetscCall(VecDestroy(&p)); 1521 1522 /* insert in global quadrature vecs */ 1523 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 1524 for (i=1;i<n_neigh;i++) { 1525 const PetscInt *idxs; 1526 PetscInt idx,nn,j; 1527 1528 idxs = shared[i]; 1529 nn = n_shared[i]; 1530 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1531 PetscCall(PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx)); 1532 idx = -(idx+1); 1533 PetscCheck(idx >= 0 && idx < maxneighs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")",idx,maxneighs); 1534 PetscCall(ISLocalToGlobalMappingApply(map,nn,idxs,gidxs)); 1535 PetscCall(VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES)); 1536 } 1537 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1538 PetscCall(VecRestoreArrayRead(vins,&array)); 1539 if (vl2l) { 1540 PetscCall(VecDestroy(&vins)); 1541 } 1542 PetscCall(VecDestroy(&v)); 1543 PetscCall(PetscFree2(gidxs,vals)); 1544 1545 /* assemble near null space */ 1546 for (i=0;i<maxneighs;i++) { 1547 PetscCall(VecAssemblyBegin(quad_vecs[i])); 1548 } 1549 for (i=0;i<maxneighs;i++) { 1550 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1551 PetscCall(VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view")); 1552 PetscCall(VecLockReadPush(quad_vecs[i])); 1553 } 1554 PetscCall(VecDestroyVecs(maxneighs,&quad_vecs)); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1559 { 1560 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1561 1562 PetscFunctionBegin; 1563 if (primalv) { 1564 if (pcbddc->user_primal_vertices_local) { 1565 IS list[2], newp; 1566 1567 list[0] = primalv; 1568 list[1] = pcbddc->user_primal_vertices_local; 1569 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp)); 1570 PetscCall(ISSortRemoveDups(newp)); 1571 PetscCall(ISDestroy(&list[1])); 1572 pcbddc->user_primal_vertices_local = newp; 1573 } else { 1574 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primalv)); 1575 } 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1581 { 1582 PetscInt f, *comp = (PetscInt *)ctx; 1583 1584 PetscFunctionBegin; 1585 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1590 { 1591 Vec local,global; 1592 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1593 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1594 PetscBool monolithic = PETSC_FALSE; 1595 1596 PetscFunctionBegin; 1597 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC"); 1598 PetscCall(PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL)); 1599 PetscOptionsEnd(); 1600 /* need to convert from global to local topology information and remove references to information in global ordering */ 1601 PetscCall(MatCreateVecs(pc->pmat,&global,NULL)); 1602 PetscCall(MatCreateVecs(matis->A,&local,NULL)); 1603 PetscCall(VecBindToCPU(global,PETSC_TRUE)); 1604 PetscCall(VecBindToCPU(local,PETSC_TRUE)); 1605 if (monolithic) { /* just get block size to properly compute vertices */ 1606 if (pcbddc->vertex_size == 1) { 1607 PetscCall(MatGetBlockSize(pc->pmat,&pcbddc->vertex_size)); 1608 } 1609 goto boundary; 1610 } 1611 1612 if (pcbddc->user_provided_isfordofs) { 1613 if (pcbddc->n_ISForDofs) { 1614 PetscInt i; 1615 1616 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal)); 1617 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1618 PetscInt bs; 1619 1620 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i])); 1621 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i],&bs)); 1622 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1623 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1624 } 1625 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1626 pcbddc->n_ISForDofs = 0; 1627 PetscCall(PetscFree(pcbddc->ISForDofs)); 1628 } 1629 } else { 1630 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1631 DM dm; 1632 1633 PetscCall(MatGetDM(pc->pmat, &dm)); 1634 if (!dm) { 1635 PetscCall(PCGetDM(pc, &dm)); 1636 } 1637 if (dm) { 1638 IS *fields; 1639 PetscInt nf,i; 1640 1641 PetscCall(DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL)); 1642 PetscCall(PetscMalloc1(nf,&pcbddc->ISForDofsLocal)); 1643 for (i=0;i<nf;i++) { 1644 PetscInt bs; 1645 1646 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i])); 1647 PetscCall(ISGetBlockSize(fields[i],&bs)); 1648 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1649 PetscCall(ISDestroy(&fields[i])); 1650 } 1651 PetscCall(PetscFree(fields)); 1652 pcbddc->n_ISForDofsLocal = nf; 1653 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1654 PetscContainer c; 1655 1656 PetscCall(PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c)); 1657 if (c) { 1658 MatISLocalFields lf; 1659 PetscCall(PetscContainerGetPointer(c,(void**)&lf)); 1660 PetscCall(PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf)); 1661 } else { /* fallback, create the default fields if bs > 1 */ 1662 PetscInt i, n = matis->A->rmap->n; 1663 PetscCall(MatGetBlockSize(pc->pmat,&i)); 1664 if (i > 1) { 1665 pcbddc->n_ISForDofsLocal = i; 1666 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal)); 1667 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1668 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i])); 1669 } 1670 } 1671 } 1672 } 1673 } else { 1674 PetscInt i; 1675 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1676 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i])); 1677 } 1678 } 1679 } 1680 1681 boundary: 1682 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1683 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal)); 1684 } else if (pcbddc->DirichletBoundariesLocal) { 1685 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal)); 1686 } 1687 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1688 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal)); 1689 } else if (pcbddc->NeumannBoundariesLocal) { 1690 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal)); 1691 } 1692 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1693 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local)); 1694 } 1695 PetscCall(VecDestroy(&global)); 1696 PetscCall(VecDestroy(&local)); 1697 /* detect local disconnected subdomains if requested (use matis->A) */ 1698 if (pcbddc->detect_disconnected) { 1699 IS primalv = NULL; 1700 PetscInt i; 1701 PetscBool filter = pcbddc->detect_disconnected_filter; 1702 1703 for (i=0;i<pcbddc->n_local_subs;i++) { 1704 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1705 } 1706 PetscCall(PetscFree(pcbddc->local_subs)); 1707 PetscCall(PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv)); 1708 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,primalv)); 1709 PetscCall(ISDestroy(&primalv)); 1710 } 1711 /* early stage corner detection */ 1712 { 1713 DM dm; 1714 1715 PetscCall(MatGetDM(pc->pmat,&dm)); 1716 if (!dm) { 1717 PetscCall(PCGetDM(pc,&dm)); 1718 } 1719 if (dm) { 1720 PetscBool isda; 1721 1722 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda)); 1723 if (isda) { 1724 ISLocalToGlobalMapping l2l; 1725 IS corners; 1726 Mat lA; 1727 PetscBool gl,lo; 1728 1729 { 1730 Vec cvec; 1731 const PetscScalar *coords; 1732 PetscInt dof,n,cdim; 1733 PetscBool memc = PETSC_TRUE; 1734 1735 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1736 PetscCall(DMGetCoordinates(dm,&cvec)); 1737 PetscCall(VecGetLocalSize(cvec,&n)); 1738 PetscCall(VecGetBlockSize(cvec,&cdim)); 1739 n /= cdim; 1740 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1741 PetscCall(PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords)); 1742 PetscCall(VecGetArrayRead(cvec,&coords)); 1743 #if defined(PETSC_USE_COMPLEX) 1744 memc = PETSC_FALSE; 1745 #endif 1746 if (dof != 1) memc = PETSC_FALSE; 1747 if (memc) { 1748 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof)); 1749 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1750 PetscReal *bcoords = pcbddc->mat_graph->coords; 1751 PetscInt i, b, d; 1752 1753 for (i=0;i<n;i++) { 1754 for (b=0;b<dof;b++) { 1755 for (d=0;d<cdim;d++) { 1756 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1757 } 1758 } 1759 } 1760 } 1761 PetscCall(VecRestoreArrayRead(cvec,&coords)); 1762 pcbddc->mat_graph->cdim = cdim; 1763 pcbddc->mat_graph->cnloc = dof*n; 1764 pcbddc->mat_graph->cloc = PETSC_FALSE; 1765 } 1766 PetscCall(DMDAGetSubdomainCornersIS(dm,&corners)); 1767 PetscCall(MatISGetLocalMat(pc->pmat,&lA)); 1768 PetscCall(MatGetLocalToGlobalMapping(lA,&l2l,NULL)); 1769 PetscCall(MatISRestoreLocalMat(pc->pmat,&lA)); 1770 lo = (PetscBool)(l2l && corners); 1771 PetscCall(MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 1772 if (gl) { /* From PETSc's DMDA */ 1773 const PetscInt *idx; 1774 PetscInt dof,bs,*idxout,n; 1775 1776 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1777 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l,&bs)); 1778 PetscCall(ISGetLocalSize(corners,&n)); 1779 PetscCall(ISGetIndices(corners,&idx)); 1780 if (bs == dof) { 1781 PetscCall(PetscMalloc1(n,&idxout)); 1782 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout)); 1783 } else { /* the original DMDA local-to-local map have been modified */ 1784 PetscInt i,d; 1785 1786 PetscCall(PetscMalloc1(dof*n,&idxout)); 1787 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1788 PetscCall(ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout)); 1789 1790 bs = 1; 1791 n *= dof; 1792 } 1793 PetscCall(ISRestoreIndices(corners,&idx)); 1794 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1795 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners)); 1796 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,corners)); 1797 PetscCall(ISDestroy(&corners)); 1798 pcbddc->corner_selected = PETSC_TRUE; 1799 pcbddc->corner_selection = PETSC_TRUE; 1800 } 1801 if (corners) { 1802 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1803 } 1804 } 1805 } 1806 } 1807 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1808 DM dm; 1809 1810 PetscCall(MatGetDM(pc->pmat,&dm)); 1811 if (!dm) { 1812 PetscCall(PCGetDM(pc,&dm)); 1813 } 1814 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1815 Vec vcoords; 1816 PetscSection section; 1817 PetscReal *coords; 1818 PetscInt d,cdim,nl,nf,**ctxs; 1819 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1820 /* debug coordinates */ 1821 PetscViewer viewer; 1822 PetscBool flg; 1823 PetscViewerFormat format; 1824 const char *prefix; 1825 1826 PetscCall(DMGetCoordinateDim(dm,&cdim)); 1827 PetscCall(DMGetLocalSection(dm,§ion)); 1828 PetscCall(PetscSectionGetNumFields(section,&nf)); 1829 PetscCall(DMCreateGlobalVector(dm,&vcoords)); 1830 PetscCall(VecGetLocalSize(vcoords,&nl)); 1831 PetscCall(PetscMalloc1(nl*cdim,&coords)); 1832 PetscCall(PetscMalloc2(nf,&funcs,nf,&ctxs)); 1833 PetscCall(PetscMalloc1(nf,&ctxs[0])); 1834 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1835 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1836 1837 /* debug coordinates */ 1838 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 1839 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords),((PetscObject)vcoords)->options,prefix,"-pc_bddc_coords_vec_view",&viewer,&format,&flg)); 1840 if (flg) PetscCall(PetscViewerPushFormat(viewer,format)); 1841 for (d=0;d<cdim;d++) { 1842 PetscInt i; 1843 const PetscScalar *v; 1844 char name[16]; 1845 1846 for (i=0;i<nf;i++) ctxs[i][0] = d; 1847 PetscCall(PetscSNPrintf(name,sizeof(name),"bddc_coords_%d",(int)d)); 1848 PetscCall(PetscObjectSetName((PetscObject)vcoords,name)); 1849 PetscCall(DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords)); 1850 if (flg) PetscCall(VecView(vcoords,viewer)); 1851 PetscCall(VecGetArrayRead(vcoords,&v)); 1852 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1853 PetscCall(VecRestoreArrayRead(vcoords,&v)); 1854 } 1855 PetscCall(VecDestroy(&vcoords)); 1856 PetscCall(PCSetCoordinates(pc,cdim,nl,coords)); 1857 PetscCall(PetscFree(coords)); 1858 PetscCall(PetscFree(ctxs[0])); 1859 PetscCall(PetscFree2(funcs,ctxs)); 1860 if (flg) { 1861 PetscCall(PetscViewerPopFormat(viewer)); 1862 PetscCall(PetscViewerDestroy(&viewer)); 1863 } 1864 } 1865 } 1866 PetscFunctionReturn(0); 1867 } 1868 1869 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1870 { 1871 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1872 IS nis; 1873 const PetscInt *idxs; 1874 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1875 1876 PetscFunctionBegin; 1877 PetscCheck(mop == MPI_LAND || mop == MPI_LOR,PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1878 if (mop == MPI_LAND) { 1879 /* init rootdata with true */ 1880 for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1; 1881 } else { 1882 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 1883 } 1884 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 1885 PetscCall(ISGetLocalSize(*is,&nd)); 1886 PetscCall(ISGetIndices(*is,&idxs)); 1887 for (i=0;i<nd;i++) 1888 if (-1 < idxs[i] && idxs[i] < n) 1889 matis->sf_leafdata[idxs[i]] = 1; 1890 PetscCall(ISRestoreIndices(*is,&idxs)); 1891 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1892 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1893 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1894 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1895 if (mop == MPI_LAND) { 1896 PetscCall(PetscMalloc1(nd,&nidxs)); 1897 } else { 1898 PetscCall(PetscMalloc1(n,&nidxs)); 1899 } 1900 for (i=0,nnd=0;i<n;i++) 1901 if (matis->sf_leafdata[i]) 1902 nidxs[nnd++] = i; 1903 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis)); 1904 PetscCall(ISDestroy(is)); 1905 *is = nis; 1906 PetscFunctionReturn(0); 1907 } 1908 1909 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1910 { 1911 PC_IS *pcis = (PC_IS*)(pc->data); 1912 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1913 1914 PetscFunctionBegin; 1915 if (!pcbddc->benign_have_null) { 1916 PetscFunctionReturn(0); 1917 } 1918 if (pcbddc->ChangeOfBasisMatrix) { 1919 Vec swap; 1920 1921 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change)); 1922 swap = pcbddc->work_change; 1923 pcbddc->work_change = r; 1924 r = swap; 1925 } 1926 PetscCall(VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1927 PetscCall(VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1928 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1929 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D)); 1930 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1931 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 1932 PetscCall(VecSet(z,0.)); 1933 PetscCall(VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1934 PetscCall(VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1935 if (pcbddc->ChangeOfBasisMatrix) { 1936 pcbddc->work_change = r; 1937 PetscCall(VecCopy(z,pcbddc->work_change)); 1938 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z)); 1939 } 1940 PetscFunctionReturn(0); 1941 } 1942 1943 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1944 { 1945 PCBDDCBenignMatMult_ctx ctx; 1946 PetscBool apply_right,apply_left,reset_x; 1947 1948 PetscFunctionBegin; 1949 PetscCall(MatShellGetContext(A,&ctx)); 1950 if (transpose) { 1951 apply_right = ctx->apply_left; 1952 apply_left = ctx->apply_right; 1953 } else { 1954 apply_right = ctx->apply_right; 1955 apply_left = ctx->apply_left; 1956 } 1957 reset_x = PETSC_FALSE; 1958 if (apply_right) { 1959 const PetscScalar *ax; 1960 PetscInt nl,i; 1961 1962 PetscCall(VecGetLocalSize(x,&nl)); 1963 PetscCall(VecGetArrayRead(x,&ax)); 1964 PetscCall(PetscArraycpy(ctx->work,ax,nl)); 1965 PetscCall(VecRestoreArrayRead(x,&ax)); 1966 for (i=0;i<ctx->benign_n;i++) { 1967 PetscScalar sum,val; 1968 const PetscInt *idxs; 1969 PetscInt nz,j; 1970 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1971 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1972 sum = 0.; 1973 if (ctx->apply_p0) { 1974 val = ctx->work[idxs[nz-1]]; 1975 for (j=0;j<nz-1;j++) { 1976 sum += ctx->work[idxs[j]]; 1977 ctx->work[idxs[j]] += val; 1978 } 1979 } else { 1980 for (j=0;j<nz-1;j++) { 1981 sum += ctx->work[idxs[j]]; 1982 } 1983 } 1984 ctx->work[idxs[nz-1]] -= sum; 1985 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1986 } 1987 PetscCall(VecPlaceArray(x,ctx->work)); 1988 reset_x = PETSC_TRUE; 1989 } 1990 if (transpose) { 1991 PetscCall(MatMultTranspose(ctx->A,x,y)); 1992 } else { 1993 PetscCall(MatMult(ctx->A,x,y)); 1994 } 1995 if (reset_x) { 1996 PetscCall(VecResetArray(x)); 1997 } 1998 if (apply_left) { 1999 PetscScalar *ay; 2000 PetscInt i; 2001 2002 PetscCall(VecGetArray(y,&ay)); 2003 for (i=0;i<ctx->benign_n;i++) { 2004 PetscScalar sum,val; 2005 const PetscInt *idxs; 2006 PetscInt nz,j; 2007 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 2008 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 2009 val = -ay[idxs[nz-1]]; 2010 if (ctx->apply_p0) { 2011 sum = 0.; 2012 for (j=0;j<nz-1;j++) { 2013 sum += ay[idxs[j]]; 2014 ay[idxs[j]] += val; 2015 } 2016 ay[idxs[nz-1]] += sum; 2017 } else { 2018 for (j=0;j<nz-1;j++) { 2019 ay[idxs[j]] += val; 2020 } 2021 ay[idxs[nz-1]] = 0.; 2022 } 2023 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 2024 } 2025 PetscCall(VecRestoreArray(y,&ay)); 2026 } 2027 PetscFunctionReturn(0); 2028 } 2029 2030 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2031 { 2032 PetscFunctionBegin; 2033 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE)); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2038 { 2039 PetscFunctionBegin; 2040 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE)); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2045 { 2046 PC_IS *pcis = (PC_IS*)pc->data; 2047 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2048 PCBDDCBenignMatMult_ctx ctx; 2049 2050 PetscFunctionBegin; 2051 if (!restore) { 2052 Mat A_IB,A_BI; 2053 PetscScalar *work; 2054 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2055 2056 PetscCheck(!pcbddc->benign_original_mat,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2057 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2058 PetscCall(PetscMalloc1(pcis->n,&work)); 2059 PetscCall(MatCreate(PETSC_COMM_SELF,&A_IB)); 2060 PetscCall(MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE)); 2061 PetscCall(MatSetType(A_IB,MATSHELL)); 2062 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private)); 2063 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2064 PetscCall(PetscNew(&ctx)); 2065 PetscCall(MatShellSetContext(A_IB,ctx)); 2066 ctx->apply_left = PETSC_TRUE; 2067 ctx->apply_right = PETSC_FALSE; 2068 ctx->apply_p0 = PETSC_FALSE; 2069 ctx->benign_n = pcbddc->benign_n; 2070 if (reuse) { 2071 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2072 ctx->free = PETSC_FALSE; 2073 } else { /* TODO: could be optimized for successive solves */ 2074 ISLocalToGlobalMapping N_to_D; 2075 PetscInt i; 2076 2077 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D)); 2078 PetscCall(PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs)); 2079 for (i=0;i<pcbddc->benign_n;i++) { 2080 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i])); 2081 } 2082 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2083 ctx->free = PETSC_TRUE; 2084 } 2085 ctx->A = pcis->A_IB; 2086 ctx->work = work; 2087 PetscCall(MatSetUp(A_IB)); 2088 PetscCall(MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY)); 2089 PetscCall(MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY)); 2090 pcis->A_IB = A_IB; 2091 2092 /* A_BI as A_IB^T */ 2093 PetscCall(MatCreateTranspose(A_IB,&A_BI)); 2094 pcbddc->benign_original_mat = pcis->A_BI; 2095 pcis->A_BI = A_BI; 2096 } else { 2097 if (!pcbddc->benign_original_mat) { 2098 PetscFunctionReturn(0); 2099 } 2100 PetscCall(MatShellGetContext(pcis->A_IB,&ctx)); 2101 PetscCall(MatDestroy(&pcis->A_IB)); 2102 pcis->A_IB = ctx->A; 2103 ctx->A = NULL; 2104 PetscCall(MatDestroy(&pcis->A_BI)); 2105 pcis->A_BI = pcbddc->benign_original_mat; 2106 pcbddc->benign_original_mat = NULL; 2107 if (ctx->free) { 2108 PetscInt i; 2109 for (i=0;i<ctx->benign_n;i++) { 2110 PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2111 } 2112 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2113 } 2114 PetscCall(PetscFree(ctx->work)); 2115 PetscCall(PetscFree(ctx)); 2116 } 2117 PetscFunctionReturn(0); 2118 } 2119 2120 /* used just in bddc debug mode */ 2121 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2122 { 2123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2124 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2125 Mat An; 2126 2127 PetscFunctionBegin; 2128 PetscCall(MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An)); 2129 PetscCall(MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL)); 2130 if (is1) { 2131 PetscCall(MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B)); 2132 PetscCall(MatDestroy(&An)); 2133 } else { 2134 *B = An; 2135 } 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /* TODO: add reuse flag */ 2140 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2141 { 2142 Mat Bt; 2143 PetscScalar *a,*bdata; 2144 const PetscInt *ii,*ij; 2145 PetscInt m,n,i,nnz,*bii,*bij; 2146 PetscBool flg_row; 2147 2148 PetscFunctionBegin; 2149 PetscCall(MatGetSize(A,&n,&m)); 2150 PetscCall(MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2151 PetscCall(MatSeqAIJGetArray(A,&a)); 2152 nnz = n; 2153 for (i=0;i<ii[n];i++) { 2154 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2155 } 2156 PetscCall(PetscMalloc1(n+1,&bii)); 2157 PetscCall(PetscMalloc1(nnz,&bij)); 2158 PetscCall(PetscMalloc1(nnz,&bdata)); 2159 nnz = 0; 2160 bii[0] = 0; 2161 for (i=0;i<n;i++) { 2162 PetscInt j; 2163 for (j=ii[i];j<ii[i+1];j++) { 2164 PetscScalar entry = a[j]; 2165 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2166 bij[nnz] = ij[j]; 2167 bdata[nnz] = entry; 2168 nnz++; 2169 } 2170 } 2171 bii[i+1] = nnz; 2172 } 2173 PetscCall(MatSeqAIJRestoreArray(A,&a)); 2174 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt)); 2175 PetscCall(MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2176 { 2177 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2178 b->free_a = PETSC_TRUE; 2179 b->free_ij = PETSC_TRUE; 2180 } 2181 if (*B == A) { 2182 PetscCall(MatDestroy(&A)); 2183 } 2184 *B = Bt; 2185 PetscFunctionReturn(0); 2186 } 2187 2188 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2189 { 2190 Mat B = NULL; 2191 DM dm; 2192 IS is_dummy,*cc_n; 2193 ISLocalToGlobalMapping l2gmap_dummy; 2194 PCBDDCGraph graph; 2195 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2196 PetscInt i,n; 2197 PetscInt *xadj,*adjncy; 2198 PetscBool isplex = PETSC_FALSE; 2199 2200 PetscFunctionBegin; 2201 if (ncc) *ncc = 0; 2202 if (cc) *cc = NULL; 2203 if (primalv) *primalv = NULL; 2204 PetscCall(PCBDDCGraphCreate(&graph)); 2205 PetscCall(MatGetDM(pc->pmat,&dm)); 2206 if (!dm) { 2207 PetscCall(PCGetDM(pc,&dm)); 2208 } 2209 if (dm) { 2210 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex)); 2211 } 2212 if (filter) isplex = PETSC_FALSE; 2213 2214 if (isplex) { /* this code has been modified from plexpartition.c */ 2215 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2216 PetscInt *adj = NULL; 2217 IS cellNumbering; 2218 const PetscInt *cellNum; 2219 PetscBool useCone, useClosure; 2220 PetscSection section; 2221 PetscSegBuffer adjBuffer; 2222 PetscSF sfPoint; 2223 2224 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2225 PetscCall(DMGetPointSF(dm, &sfPoint)); 2226 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2227 /* Build adjacency graph via a section/segbuffer */ 2228 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion)); 2229 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2230 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer)); 2231 /* Always use FVM adjacency to create partitioner graph */ 2232 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2233 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2234 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2235 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2236 for (n = 0, p = pStart; p < pEnd; p++) { 2237 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2238 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2239 adjSize = PETSC_DETERMINE; 2240 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2241 for (a = 0; a < adjSize; ++a) { 2242 const PetscInt point = adj[a]; 2243 if (pStart <= point && point < pEnd) { 2244 PetscInt *PETSC_RESTRICT pBuf; 2245 PetscCall(PetscSectionAddDof(section, p, 1)); 2246 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2247 *pBuf = point; 2248 } 2249 } 2250 n++; 2251 } 2252 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2253 /* Derive CSR graph from section/segbuffer */ 2254 PetscCall(PetscSectionSetUp(section)); 2255 PetscCall(PetscSectionGetStorageSize(section, &size)); 2256 PetscCall(PetscMalloc1(n+1, &xadj)); 2257 for (idx = 0, p = pStart; p < pEnd; p++) { 2258 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2259 PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2260 } 2261 xadj[n] = size; 2262 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2263 /* Clean up */ 2264 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2265 PetscCall(PetscSectionDestroy(§ion)); 2266 PetscCall(PetscFree(adj)); 2267 graph->xadj = xadj; 2268 graph->adjncy = adjncy; 2269 } else { 2270 Mat A; 2271 PetscBool isseqaij, flg_row; 2272 2273 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2274 if (!A->rmap->N || !A->cmap->N) { 2275 PetscCall(PCBDDCGraphDestroy(&graph)); 2276 PetscFunctionReturn(0); 2277 } 2278 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij)); 2279 if (!isseqaij && filter) { 2280 PetscBool isseqdense; 2281 2282 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense)); 2283 if (!isseqdense) { 2284 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 2285 } else { /* TODO: rectangular case and LDA */ 2286 PetscScalar *array; 2287 PetscReal chop=1.e-6; 2288 2289 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 2290 PetscCall(MatDenseGetArray(B,&array)); 2291 PetscCall(MatGetSize(B,&n,NULL)); 2292 for (i=0;i<n;i++) { 2293 PetscInt j; 2294 for (j=i+1;j<n;j++) { 2295 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2296 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2297 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2298 } 2299 } 2300 PetscCall(MatDenseRestoreArray(B,&array)); 2301 PetscCall(MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B)); 2302 } 2303 } else { 2304 PetscCall(PetscObjectReference((PetscObject)A)); 2305 B = A; 2306 } 2307 PetscCall(MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2308 2309 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2310 if (filter) { 2311 PetscScalar *data; 2312 PetscInt j,cum; 2313 2314 PetscCall(PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered)); 2315 PetscCall(MatSeqAIJGetArray(B,&data)); 2316 cum = 0; 2317 for (i=0;i<n;i++) { 2318 PetscInt t; 2319 2320 for (j=xadj[i];j<xadj[i+1];j++) { 2321 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2322 continue; 2323 } 2324 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2325 } 2326 t = xadj_filtered[i]; 2327 xadj_filtered[i] = cum; 2328 cum += t; 2329 } 2330 PetscCall(MatSeqAIJRestoreArray(B,&data)); 2331 graph->xadj = xadj_filtered; 2332 graph->adjncy = adjncy_filtered; 2333 } else { 2334 graph->xadj = xadj; 2335 graph->adjncy = adjncy; 2336 } 2337 } 2338 /* compute local connected components using PCBDDCGraph */ 2339 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy)); 2340 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy)); 2341 PetscCall(ISDestroy(&is_dummy)); 2342 PetscCall(PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT)); 2343 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2344 PetscCall(PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL)); 2345 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2346 2347 /* partial clean up */ 2348 PetscCall(PetscFree2(xadj_filtered,adjncy_filtered)); 2349 if (B) { 2350 PetscBool flg_row; 2351 PetscCall(MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2352 PetscCall(MatDestroy(&B)); 2353 } 2354 if (isplex) { 2355 PetscCall(PetscFree(xadj)); 2356 PetscCall(PetscFree(adjncy)); 2357 } 2358 2359 /* get back data */ 2360 if (isplex) { 2361 if (ncc) *ncc = graph->ncc; 2362 if (cc || primalv) { 2363 Mat A; 2364 PetscBT btv,btvt; 2365 PetscSection subSection; 2366 PetscInt *ids,cum,cump,*cids,*pids; 2367 2368 PetscCall(DMPlexGetSubdomainSection(dm,&subSection)); 2369 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2370 PetscCall(PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids)); 2371 PetscCall(PetscBTCreate(A->rmap->n,&btv)); 2372 PetscCall(PetscBTCreate(A->rmap->n,&btvt)); 2373 2374 cids[0] = 0; 2375 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2376 PetscInt j; 2377 2378 PetscCall(PetscBTMemzero(A->rmap->n,btvt)); 2379 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2380 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2381 2382 PetscCall(DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2383 for (k = 0; k < 2*size; k += 2) { 2384 PetscInt s, pp, p = closure[k], off, dof, cdof; 2385 2386 PetscCall(PetscSectionGetConstraintDof(subSection,p,&cdof)); 2387 PetscCall(PetscSectionGetOffset(subSection,p,&off)); 2388 PetscCall(PetscSectionGetDof(subSection,p,&dof)); 2389 for (s = 0; s < dof-cdof; s++) { 2390 if (PetscBTLookupSet(btvt,off+s)) continue; 2391 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2392 else pids[cump++] = off+s; /* cross-vertex */ 2393 } 2394 PetscCall(DMPlexGetTreeParent(dm,p,&pp,NULL)); 2395 if (pp != p) { 2396 PetscCall(PetscSectionGetConstraintDof(subSection,pp,&cdof)); 2397 PetscCall(PetscSectionGetOffset(subSection,pp,&off)); 2398 PetscCall(PetscSectionGetDof(subSection,pp,&dof)); 2399 for (s = 0; s < dof-cdof; s++) { 2400 if (PetscBTLookupSet(btvt,off+s)) continue; 2401 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2402 else pids[cump++] = off+s; /* cross-vertex */ 2403 } 2404 } 2405 } 2406 PetscCall(DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2407 } 2408 cids[i+1] = cum; 2409 /* mark dofs as already assigned */ 2410 for (j = cids[i]; j < cids[i+1]; j++) { 2411 PetscCall(PetscBTSet(btv,ids[j])); 2412 } 2413 } 2414 if (cc) { 2415 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2416 for (i = 0; i < graph->ncc; i++) { 2417 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i])); 2418 } 2419 *cc = cc_n; 2420 } 2421 if (primalv) { 2422 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv)); 2423 } 2424 PetscCall(PetscFree3(ids,cids,pids)); 2425 PetscCall(PetscBTDestroy(&btv)); 2426 PetscCall(PetscBTDestroy(&btvt)); 2427 } 2428 } else { 2429 if (ncc) *ncc = graph->ncc; 2430 if (cc) { 2431 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2432 for (i=0;i<graph->ncc;i++) { 2433 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i])); 2434 } 2435 *cc = cc_n; 2436 } 2437 } 2438 /* clean up graph */ 2439 graph->xadj = NULL; 2440 graph->adjncy = NULL; 2441 PetscCall(PCBDDCGraphDestroy(&graph)); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2446 { 2447 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2448 PC_IS* pcis = (PC_IS*)(pc->data); 2449 IS dirIS = NULL; 2450 PetscInt i; 2451 2452 PetscFunctionBegin; 2453 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS)); 2454 if (zerodiag) { 2455 Mat A; 2456 Vec vec3_N; 2457 PetscScalar *vals; 2458 const PetscInt *idxs; 2459 PetscInt nz,*count; 2460 2461 /* p0 */ 2462 PetscCall(VecSet(pcis->vec1_N,0.)); 2463 PetscCall(PetscMalloc1(pcis->n,&vals)); 2464 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2465 PetscCall(ISGetIndices(zerodiag,&idxs)); 2466 for (i=0;i<nz;i++) vals[i] = 1.; 2467 PetscCall(VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES)); 2468 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2469 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2470 /* v_I */ 2471 PetscCall(VecSetRandom(pcis->vec2_N,NULL)); 2472 for (i=0;i<nz;i++) vals[i] = 0.; 2473 PetscCall(VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES)); 2474 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2475 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2476 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2477 PetscCall(VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES)); 2478 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2479 if (dirIS) { 2480 PetscInt n; 2481 2482 PetscCall(ISGetLocalSize(dirIS,&n)); 2483 PetscCall(ISGetIndices(dirIS,&idxs)); 2484 for (i=0;i<n;i++) vals[i] = 0.; 2485 PetscCall(VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES)); 2486 PetscCall(ISRestoreIndices(dirIS,&idxs)); 2487 } 2488 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2489 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2490 PetscCall(VecDuplicate(pcis->vec1_N,&vec3_N)); 2491 PetscCall(VecSet(vec3_N,0.)); 2492 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2493 PetscCall(MatMult(A,pcis->vec1_N,vec3_N)); 2494 PetscCall(VecDot(vec3_N,pcis->vec2_N,&vals[0])); 2495 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",(double)PetscAbsScalar(vals[0])); 2496 PetscCall(PetscFree(vals)); 2497 PetscCall(VecDestroy(&vec3_N)); 2498 2499 /* there should not be any pressure dofs lying on the interface */ 2500 PetscCall(PetscCalloc1(pcis->n,&count)); 2501 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2502 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2503 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2504 PetscCall(ISGetIndices(zerodiag,&idxs)); 2505 for (i=0;i<nz;i++) PetscCheck(!count[idxs[i]],PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof",idxs[i]); 2506 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2507 PetscCall(PetscFree(count)); 2508 } 2509 PetscCall(ISDestroy(&dirIS)); 2510 2511 /* check PCBDDCBenignGetOrSetP0 */ 2512 PetscCall(VecSetRandom(pcis->vec1_global,NULL)); 2513 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2514 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE)); 2515 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2516 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE)); 2517 for (i=0;i<pcbddc->benign_n;i++) { 2518 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2519 PetscCheck(val == -PetscGlobalRank-i,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g",(double)PetscRealPart(pcbddc->benign_p0[i]),i,(double)(-PetscGlobalRank-i)); 2520 } 2521 PetscFunctionReturn(0); 2522 } 2523 2524 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2525 { 2526 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2527 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2528 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2529 PetscInt nz,n,benign_n,bsp = 1; 2530 PetscInt *interior_dofs,n_interior_dofs,nneu; 2531 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2532 2533 PetscFunctionBegin; 2534 if (reuse) goto project_b0; 2535 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2536 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2537 for (n=0;n<pcbddc->benign_n;n++) { 2538 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2539 } 2540 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2541 has_null_pressures = PETSC_TRUE; 2542 have_null = PETSC_TRUE; 2543 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2544 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2545 Checks if all the pressure dofs in each subdomain have a zero diagonal 2546 If not, a change of basis on pressures is not needed 2547 since the local Schur complements are already SPD 2548 */ 2549 if (pcbddc->n_ISForDofsLocal) { 2550 IS iP = NULL; 2551 PetscInt p,*pp; 2552 PetscBool flg; 2553 2554 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp)); 2555 n = pcbddc->n_ISForDofsLocal; 2556 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC"); 2557 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg)); 2558 PetscOptionsEnd(); 2559 if (!flg) { 2560 n = 1; 2561 pp[0] = pcbddc->n_ISForDofsLocal-1; 2562 } 2563 2564 bsp = 0; 2565 for (p=0;p<n;p++) { 2566 PetscInt bs; 2567 2568 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal,PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %" PetscInt_FMT,pp[p]); 2569 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2570 bsp += bs; 2571 } 2572 PetscCall(PetscMalloc1(bsp,&bzerodiag)); 2573 bsp = 0; 2574 for (p=0;p<n;p++) { 2575 const PetscInt *idxs; 2576 PetscInt b,bs,npl,*bidxs; 2577 2578 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2579 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl)); 2580 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2581 PetscCall(PetscMalloc1(npl/bs,&bidxs)); 2582 for (b=0;b<bs;b++) { 2583 PetscInt i; 2584 2585 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2586 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp])); 2587 bsp++; 2588 } 2589 PetscCall(PetscFree(bidxs)); 2590 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2591 } 2592 PetscCall(ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures)); 2593 2594 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2595 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP)); 2596 if (iP) { 2597 IS newpressures; 2598 2599 PetscCall(ISDifference(pressures,iP,&newpressures)); 2600 PetscCall(ISDestroy(&pressures)); 2601 pressures = newpressures; 2602 } 2603 PetscCall(ISSorted(pressures,&sorted)); 2604 if (!sorted) { 2605 PetscCall(ISSort(pressures)); 2606 } 2607 PetscCall(PetscFree(pp)); 2608 } 2609 2610 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2611 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2612 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2613 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag)); 2614 PetscCall(ISSorted(zerodiag,&sorted)); 2615 if (!sorted) { 2616 PetscCall(ISSort(zerodiag)); 2617 } 2618 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2619 zerodiag_save = zerodiag; 2620 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2621 if (!nz) { 2622 if (n) have_null = PETSC_FALSE; 2623 has_null_pressures = PETSC_FALSE; 2624 PetscCall(ISDestroy(&zerodiag)); 2625 } 2626 recompute_zerodiag = PETSC_FALSE; 2627 2628 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2629 zerodiag_subs = NULL; 2630 benign_n = 0; 2631 n_interior_dofs = 0; 2632 interior_dofs = NULL; 2633 nneu = 0; 2634 if (pcbddc->NeumannBoundariesLocal) { 2635 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu)); 2636 } 2637 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2638 if (checkb) { /* need to compute interior nodes */ 2639 PetscInt n,i,j; 2640 PetscInt n_neigh,*neigh,*n_shared,**shared; 2641 PetscInt *iwork; 2642 2643 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping,&n)); 2644 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2645 PetscCall(PetscCalloc1(n,&iwork)); 2646 PetscCall(PetscMalloc1(n,&interior_dofs)); 2647 for (i=1;i<n_neigh;i++) 2648 for (j=0;j<n_shared[i];j++) 2649 iwork[shared[i][j]] += 1; 2650 for (i=0;i<n;i++) 2651 if (!iwork[i]) 2652 interior_dofs[n_interior_dofs++] = i; 2653 PetscCall(PetscFree(iwork)); 2654 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2655 } 2656 if (has_null_pressures) { 2657 IS *subs; 2658 PetscInt nsubs,i,j,nl; 2659 const PetscInt *idxs; 2660 PetscScalar *array; 2661 Vec *work; 2662 2663 subs = pcbddc->local_subs; 2664 nsubs = pcbddc->n_local_subs; 2665 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2666 if (checkb) { 2667 PetscCall(VecDuplicateVecs(matis->y,2,&work)); 2668 PetscCall(ISGetLocalSize(zerodiag,&nl)); 2669 PetscCall(ISGetIndices(zerodiag,&idxs)); 2670 /* work[0] = 1_p */ 2671 PetscCall(VecSet(work[0],0.)); 2672 PetscCall(VecGetArray(work[0],&array)); 2673 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2674 PetscCall(VecRestoreArray(work[0],&array)); 2675 /* work[0] = 1_v */ 2676 PetscCall(VecSet(work[1],1.)); 2677 PetscCall(VecGetArray(work[1],&array)); 2678 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2679 PetscCall(VecRestoreArray(work[1],&array)); 2680 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2681 } 2682 2683 if (nsubs > 1 || bsp > 1) { 2684 IS *is; 2685 PetscInt b,totb; 2686 2687 totb = bsp; 2688 is = bsp > 1 ? bzerodiag : &zerodiag; 2689 nsubs = PetscMax(nsubs,1); 2690 PetscCall(PetscCalloc1(nsubs*totb,&zerodiag_subs)); 2691 for (b=0;b<totb;b++) { 2692 for (i=0;i<nsubs;i++) { 2693 ISLocalToGlobalMapping l2g; 2694 IS t_zerodiag_subs; 2695 PetscInt nl; 2696 2697 if (subs) { 2698 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i],&l2g)); 2699 } else { 2700 IS tis; 2701 2702 PetscCall(MatGetLocalSize(pcbddc->local_mat,&nl,NULL)); 2703 PetscCall(ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis)); 2704 PetscCall(ISLocalToGlobalMappingCreateIS(tis,&l2g)); 2705 PetscCall(ISDestroy(&tis)); 2706 } 2707 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs)); 2708 PetscCall(ISGetLocalSize(t_zerodiag_subs,&nl)); 2709 if (nl) { 2710 PetscBool valid = PETSC_TRUE; 2711 2712 if (checkb) { 2713 PetscCall(VecSet(matis->x,0)); 2714 PetscCall(ISGetLocalSize(subs[i],&nl)); 2715 PetscCall(ISGetIndices(subs[i],&idxs)); 2716 PetscCall(VecGetArray(matis->x,&array)); 2717 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2718 PetscCall(VecRestoreArray(matis->x,&array)); 2719 PetscCall(ISRestoreIndices(subs[i],&idxs)); 2720 PetscCall(VecPointwiseMult(matis->x,work[0],matis->x)); 2721 PetscCall(MatMult(matis->A,matis->x,matis->y)); 2722 PetscCall(VecPointwiseMult(matis->y,work[1],matis->y)); 2723 PetscCall(VecGetArray(matis->y,&array)); 2724 for (j=0;j<n_interior_dofs;j++) { 2725 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2726 valid = PETSC_FALSE; 2727 break; 2728 } 2729 } 2730 PetscCall(VecRestoreArray(matis->y,&array)); 2731 } 2732 if (valid && nneu) { 2733 const PetscInt *idxs; 2734 PetscInt nzb; 2735 2736 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2737 PetscCall(ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL)); 2738 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2739 if (nzb) valid = PETSC_FALSE; 2740 } 2741 if (valid && pressures) { 2742 IS t_pressure_subs,tmp; 2743 PetscInt i1,i2; 2744 2745 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs)); 2746 PetscCall(ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp)); 2747 PetscCall(ISGetLocalSize(tmp,&i1)); 2748 PetscCall(ISGetLocalSize(t_zerodiag_subs,&i2)); 2749 if (i2 != i1) valid = PETSC_FALSE; 2750 PetscCall(ISDestroy(&t_pressure_subs)); 2751 PetscCall(ISDestroy(&tmp)); 2752 } 2753 if (valid) { 2754 PetscCall(ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n])); 2755 benign_n++; 2756 } else recompute_zerodiag = PETSC_TRUE; 2757 } 2758 PetscCall(ISDestroy(&t_zerodiag_subs)); 2759 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2760 } 2761 } 2762 } else { /* there's just one subdomain (or zero if they have not been detected */ 2763 PetscBool valid = PETSC_TRUE; 2764 2765 if (nneu) valid = PETSC_FALSE; 2766 if (valid && pressures) { 2767 PetscCall(ISEqual(pressures,zerodiag,&valid)); 2768 } 2769 if (valid && checkb) { 2770 PetscCall(MatMult(matis->A,work[0],matis->x)); 2771 PetscCall(VecPointwiseMult(matis->x,work[1],matis->x)); 2772 PetscCall(VecGetArray(matis->x,&array)); 2773 for (j=0;j<n_interior_dofs;j++) { 2774 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2775 valid = PETSC_FALSE; 2776 break; 2777 } 2778 } 2779 PetscCall(VecRestoreArray(matis->x,&array)); 2780 } 2781 if (valid) { 2782 benign_n = 1; 2783 PetscCall(PetscMalloc1(benign_n,&zerodiag_subs)); 2784 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2785 zerodiag_subs[0] = zerodiag; 2786 } 2787 } 2788 if (checkb) { 2789 PetscCall(VecDestroyVecs(2,&work)); 2790 } 2791 } 2792 PetscCall(PetscFree(interior_dofs)); 2793 2794 if (!benign_n) { 2795 PetscInt n; 2796 2797 PetscCall(ISDestroy(&zerodiag)); 2798 recompute_zerodiag = PETSC_FALSE; 2799 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2800 if (n) have_null = PETSC_FALSE; 2801 } 2802 2803 /* final check for null pressures */ 2804 if (zerodiag && pressures) { 2805 PetscCall(ISEqual(pressures,zerodiag,&have_null)); 2806 } 2807 2808 if (recompute_zerodiag) { 2809 PetscCall(ISDestroy(&zerodiag)); 2810 if (benign_n == 1) { 2811 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2812 zerodiag = zerodiag_subs[0]; 2813 } else { 2814 PetscInt i,nzn,*new_idxs; 2815 2816 nzn = 0; 2817 for (i=0;i<benign_n;i++) { 2818 PetscInt ns; 2819 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2820 nzn += ns; 2821 } 2822 PetscCall(PetscMalloc1(nzn,&new_idxs)); 2823 nzn = 0; 2824 for (i=0;i<benign_n;i++) { 2825 PetscInt ns,*idxs; 2826 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2827 PetscCall(ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2828 PetscCall(PetscArraycpy(new_idxs+nzn,idxs,ns)); 2829 PetscCall(ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2830 nzn += ns; 2831 } 2832 PetscCall(PetscSortInt(nzn,new_idxs)); 2833 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag)); 2834 } 2835 have_null = PETSC_FALSE; 2836 } 2837 2838 /* determines if the coarse solver will be singular or not */ 2839 PetscCall(MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 2840 2841 /* Prepare matrix to compute no-net-flux */ 2842 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2843 Mat A,loc_divudotp; 2844 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2845 IS row,col,isused = NULL; 2846 PetscInt M,N,n,st,n_isused; 2847 2848 if (pressures) { 2849 isused = pressures; 2850 } else { 2851 isused = zerodiag_save; 2852 } 2853 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL)); 2854 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2855 PetscCall(MatGetLocalSize(A,&n,NULL)); 2856 PetscCheck(isused || (n == 0),PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2857 n_isused = 0; 2858 if (isused) { 2859 PetscCall(ISGetLocalSize(isused,&n_isused)); 2860 } 2861 PetscCallMPI(MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 2862 st = st-n_isused; 2863 if (n) { 2864 const PetscInt *gidxs; 2865 2866 PetscCall(MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp)); 2867 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 2868 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2869 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2870 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col)); 2871 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 2872 } else { 2873 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp)); 2874 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2875 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col)); 2876 } 2877 PetscCall(MatGetSize(pc->pmat,NULL,&N)); 2878 PetscCall(ISGetSize(row,&M)); 2879 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 2880 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 2881 PetscCall(ISDestroy(&row)); 2882 PetscCall(ISDestroy(&col)); 2883 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp)); 2884 PetscCall(MatSetType(pcbddc->divudotp,MATIS)); 2885 PetscCall(MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N)); 2886 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g)); 2887 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2888 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2889 PetscCall(MatISSetLocalMat(pcbddc->divudotp,loc_divudotp)); 2890 PetscCall(MatDestroy(&loc_divudotp)); 2891 PetscCall(MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2892 PetscCall(MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2893 } 2894 PetscCall(ISDestroy(&zerodiag_save)); 2895 PetscCall(ISDestroy(&pressures)); 2896 if (bzerodiag) { 2897 PetscInt i; 2898 2899 for (i=0;i<bsp;i++) { 2900 PetscCall(ISDestroy(&bzerodiag[i])); 2901 } 2902 PetscCall(PetscFree(bzerodiag)); 2903 } 2904 pcbddc->benign_n = benign_n; 2905 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2906 2907 /* determines if the problem has subdomains with 0 pressure block */ 2908 have_null = (PetscBool)(!!pcbddc->benign_n); 2909 PetscCall(MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 2910 2911 project_b0: 2912 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2913 /* change of basis and p0 dofs */ 2914 if (pcbddc->benign_n) { 2915 PetscInt i,s,*nnz; 2916 2917 /* local change of basis for pressures */ 2918 PetscCall(MatDestroy(&pcbddc->benign_change)); 2919 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change)); 2920 PetscCall(MatSetType(pcbddc->benign_change,MATAIJ)); 2921 PetscCall(MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE)); 2922 PetscCall(PetscMalloc1(n,&nnz)); 2923 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2924 for (i=0;i<pcbddc->benign_n;i++) { 2925 const PetscInt *idxs; 2926 PetscInt nzs,j; 2927 2928 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs)); 2929 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2930 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2931 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2932 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2933 } 2934 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz)); 2935 PetscCall(MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 2936 PetscCall(PetscFree(nnz)); 2937 /* set identity by default */ 2938 for (i=0;i<n;i++) { 2939 PetscCall(MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES)); 2940 } 2941 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 2942 PetscCall(PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0)); 2943 /* set change on pressures */ 2944 for (s=0;s<pcbddc->benign_n;s++) { 2945 PetscScalar *array; 2946 const PetscInt *idxs; 2947 PetscInt nzs; 2948 2949 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs)); 2950 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2951 for (i=0;i<nzs-1;i++) { 2952 PetscScalar vals[2]; 2953 PetscInt cols[2]; 2954 2955 cols[0] = idxs[i]; 2956 cols[1] = idxs[nzs-1]; 2957 vals[0] = 1.; 2958 vals[1] = 1.; 2959 PetscCall(MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES)); 2960 } 2961 PetscCall(PetscMalloc1(nzs,&array)); 2962 for (i=0;i<nzs-1;i++) array[i] = -1.; 2963 array[nzs-1] = 1.; 2964 PetscCall(MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES)); 2965 /* store local idxs for p0 */ 2966 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2967 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2968 PetscCall(PetscFree(array)); 2969 } 2970 PetscCall(MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2971 PetscCall(MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2972 2973 /* project if needed */ 2974 if (pcbddc->benign_change_explicit) { 2975 Mat M; 2976 2977 PetscCall(MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M)); 2978 PetscCall(MatDestroy(&pcbddc->local_mat)); 2979 PetscCall(MatSeqAIJCompress(M,&pcbddc->local_mat)); 2980 PetscCall(MatDestroy(&M)); 2981 } 2982 /* store global idxs for p0 */ 2983 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx)); 2984 } 2985 *zerodiaglocal = zerodiag; 2986 PetscFunctionReturn(0); 2987 } 2988 2989 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2990 { 2991 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2992 PetscScalar *array; 2993 2994 PetscFunctionBegin; 2995 if (!pcbddc->benign_sf) { 2996 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf)); 2997 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx)); 2998 } 2999 if (get) { 3000 PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array)); 3001 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 3002 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 3003 PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 3004 } else { 3005 PetscCall(VecGetArray(v,&array)); 3006 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 3007 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 3008 PetscCall(VecRestoreArray(v,&array)); 3009 } 3010 PetscFunctionReturn(0); 3011 } 3012 3013 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3014 { 3015 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3016 3017 PetscFunctionBegin; 3018 /* TODO: add error checking 3019 - avoid nested pop (or push) calls. 3020 - cannot push before pop. 3021 - cannot call this if pcbddc->local_mat is NULL 3022 */ 3023 if (!pcbddc->benign_n) { 3024 PetscFunctionReturn(0); 3025 } 3026 if (pop) { 3027 if (pcbddc->benign_change_explicit) { 3028 IS is_p0; 3029 MatReuse reuse; 3030 3031 /* extract B_0 */ 3032 reuse = MAT_INITIAL_MATRIX; 3033 if (pcbddc->benign_B0) { 3034 reuse = MAT_REUSE_MATRIX; 3035 } 3036 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0)); 3037 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0)); 3038 /* remove rows and cols from local problem */ 3039 PetscCall(MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE)); 3040 PetscCall(MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 3041 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL)); 3042 PetscCall(ISDestroy(&is_p0)); 3043 } else { 3044 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3045 PetscScalar *vals; 3046 PetscInt i,n,*idxs_ins; 3047 3048 PetscCall(VecGetLocalSize(matis->y,&n)); 3049 PetscCall(PetscMalloc2(n,&idxs_ins,n,&vals)); 3050 if (!pcbddc->benign_B0) { 3051 PetscInt *nnz; 3052 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0)); 3053 PetscCall(MatSetType(pcbddc->benign_B0,MATAIJ)); 3054 PetscCall(MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE)); 3055 PetscCall(PetscMalloc1(pcbddc->benign_n,&nnz)); 3056 for (i=0;i<pcbddc->benign_n;i++) { 3057 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i])); 3058 nnz[i] = n - nnz[i]; 3059 } 3060 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz)); 3061 PetscCall(MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 3062 PetscCall(PetscFree(nnz)); 3063 } 3064 3065 for (i=0;i<pcbddc->benign_n;i++) { 3066 PetscScalar *array; 3067 PetscInt *idxs,j,nz,cum; 3068 3069 PetscCall(VecSet(matis->x,0.)); 3070 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz)); 3071 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3072 for (j=0;j<nz;j++) vals[j] = 1.; 3073 PetscCall(VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES)); 3074 PetscCall(VecAssemblyBegin(matis->x)); 3075 PetscCall(VecAssemblyEnd(matis->x)); 3076 PetscCall(VecSet(matis->y,0.)); 3077 PetscCall(MatMult(matis->A,matis->x,matis->y)); 3078 PetscCall(VecGetArray(matis->y,&array)); 3079 cum = 0; 3080 for (j=0;j<n;j++) { 3081 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3082 vals[cum] = array[j]; 3083 idxs_ins[cum] = j; 3084 cum++; 3085 } 3086 } 3087 PetscCall(MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES)); 3088 PetscCall(VecRestoreArray(matis->y,&array)); 3089 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3090 } 3091 PetscCall(MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3092 PetscCall(MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3093 PetscCall(PetscFree2(idxs_ins,vals)); 3094 } 3095 } else { /* push */ 3096 if (pcbddc->benign_change_explicit) { 3097 PetscInt i; 3098 3099 for (i=0;i<pcbddc->benign_n;i++) { 3100 PetscScalar *B0_vals; 3101 PetscInt *B0_cols,B0_ncol; 3102 3103 PetscCall(MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3104 PetscCall(MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES)); 3105 PetscCall(MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES)); 3106 PetscCall(MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES)); 3107 PetscCall(MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3108 } 3109 PetscCall(MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3110 PetscCall(MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3111 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3112 } 3113 PetscFunctionReturn(0); 3114 } 3115 3116 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3117 { 3118 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3119 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3120 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3121 PetscBLASInt *B_iwork,*B_ifail; 3122 PetscScalar *work,lwork; 3123 PetscScalar *St,*S,*eigv; 3124 PetscScalar *Sarray,*Starray; 3125 PetscReal *eigs,thresh,lthresh,uthresh; 3126 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3127 PetscBool allocated_S_St; 3128 #if defined(PETSC_USE_COMPLEX) 3129 PetscReal *rwork; 3130 #endif 3131 3132 PetscFunctionBegin; 3133 PetscCheck(sub_schurs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3134 PetscCheck(sub_schurs->schur_explicit,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3135 PetscCheck(!sub_schurs->n_subs || !(!sub_schurs->is_symmetric),PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3136 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3137 3138 if (pcbddc->dbg_flag) { 3139 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3140 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 3141 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n")); 3142 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3143 } 3144 3145 if (pcbddc->dbg_flag) { 3146 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef)); 3147 } 3148 3149 /* max size of subsets */ 3150 mss = 0; 3151 for (i=0;i<sub_schurs->n_subs;i++) { 3152 PetscInt subset_size; 3153 3154 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3155 mss = PetscMax(mss,subset_size); 3156 } 3157 3158 /* min/max and threshold */ 3159 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3160 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3161 nmax = PetscMax(nmin,nmax); 3162 allocated_S_St = PETSC_FALSE; 3163 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3164 allocated_S_St = PETSC_TRUE; 3165 } 3166 3167 /* allocate lapack workspace */ 3168 cum = cum2 = 0; 3169 maxneigs = 0; 3170 for (i=0;i<sub_schurs->n_subs;i++) { 3171 PetscInt n,subset_size; 3172 3173 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3174 n = PetscMin(subset_size,nmax); 3175 cum += subset_size; 3176 cum2 += subset_size*n; 3177 maxneigs = PetscMax(maxneigs,n); 3178 } 3179 lwork = 0; 3180 if (mss) { 3181 if (sub_schurs->is_symmetric) { 3182 PetscScalar sdummy = 0.; 3183 PetscBLASInt B_itype = 1; 3184 PetscBLASInt B_N = mss, idummy = 0; 3185 PetscReal rdummy = 0.,zero = 0.0; 3186 PetscReal eps = 0.0; /* dlamch? */ 3187 3188 B_lwork = -1; 3189 /* some implementations may complain about NULL pointers, even if we are querying */ 3190 S = &sdummy; 3191 St = &sdummy; 3192 eigs = &rdummy; 3193 eigv = &sdummy; 3194 B_iwork = &idummy; 3195 B_ifail = &idummy; 3196 #if defined(PETSC_USE_COMPLEX) 3197 rwork = &rdummy; 3198 #endif 3199 thresh = 1.0; 3200 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3201 #if defined(PETSC_USE_COMPLEX) 3202 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3203 #else 3204 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3205 #endif 3206 PetscCheck(B_ierr == 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3207 PetscCall(PetscFPTrapPop()); 3208 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3209 } 3210 3211 nv = 0; 3212 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3213 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv)); 3214 } 3215 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork)); 3216 if (allocated_S_St) { 3217 PetscCall(PetscMalloc2(mss*mss,&S,mss*mss,&St)); 3218 } 3219 PetscCall(PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail)); 3220 #if defined(PETSC_USE_COMPLEX) 3221 PetscCall(PetscMalloc1(7*mss,&rwork)); 3222 #endif 3223 PetscCall(PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3224 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3225 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3226 nv+cum,&pcbddc->adaptive_constraints_idxs, 3227 nv+cum2,&pcbddc->adaptive_constraints_data)); 3228 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs)); 3229 3230 maxneigs = 0; 3231 cum = cumarray = 0; 3232 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3233 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3234 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3235 const PetscInt *idxs; 3236 3237 PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs)); 3238 for (cum=0;cum<nv;cum++) { 3239 pcbddc->adaptive_constraints_n[cum] = 1; 3240 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3241 pcbddc->adaptive_constraints_data[cum] = 1.0; 3242 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3243 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3244 } 3245 PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 3246 } 3247 3248 if (mss) { /* multilevel */ 3249 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3250 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3251 } 3252 3253 lthresh = pcbddc->adaptive_threshold[0]; 3254 uthresh = pcbddc->adaptive_threshold[1]; 3255 for (i=0;i<sub_schurs->n_subs;i++) { 3256 const PetscInt *idxs; 3257 PetscReal upper,lower; 3258 PetscInt j,subset_size,eigs_start = 0; 3259 PetscBLASInt B_N; 3260 PetscBool same_data = PETSC_FALSE; 3261 PetscBool scal = PETSC_FALSE; 3262 3263 if (pcbddc->use_deluxe_scaling) { 3264 upper = PETSC_MAX_REAL; 3265 lower = uthresh; 3266 } else { 3267 PetscCheck(sub_schurs->is_posdef,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3268 upper = 1./uthresh; 3269 lower = 0.; 3270 } 3271 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3272 PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs)); 3273 PetscCall(PetscBLASIntCast(subset_size,&B_N)); 3274 /* this is experimental: we assume the dofs have been properly grouped to have 3275 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3276 if (!sub_schurs->is_posdef) { 3277 Mat T; 3278 3279 for (j=0;j<subset_size;j++) { 3280 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3281 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T)); 3282 PetscCall(MatScale(T,-1.0)); 3283 PetscCall(MatDestroy(&T)); 3284 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T)); 3285 PetscCall(MatScale(T,-1.0)); 3286 PetscCall(MatDestroy(&T)); 3287 if (sub_schurs->change_primal_sub) { 3288 PetscInt nz,k; 3289 const PetscInt *idxs; 3290 3291 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz)); 3292 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i],&idxs)); 3293 for (k=0;k<nz;k++) { 3294 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3295 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3296 } 3297 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs)); 3298 } 3299 scal = PETSC_TRUE; 3300 break; 3301 } 3302 } 3303 } 3304 3305 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3306 if (sub_schurs->is_symmetric) { 3307 PetscInt j,k; 3308 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3309 PetscCall(PetscArrayzero(S,subset_size*subset_size)); 3310 PetscCall(PetscArrayzero(St,subset_size*subset_size)); 3311 } 3312 for (j=0;j<subset_size;j++) { 3313 for (k=j;k<subset_size;k++) { 3314 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3315 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3316 } 3317 } 3318 } else { 3319 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3320 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3321 } 3322 } else { 3323 S = Sarray + cumarray; 3324 St = Starray + cumarray; 3325 } 3326 /* see if we can save some work */ 3327 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3328 PetscCall(PetscArraycmp(S,St,subset_size*subset_size,&same_data)); 3329 } 3330 3331 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3332 B_neigs = 0; 3333 } else { 3334 if (sub_schurs->is_symmetric) { 3335 PetscBLASInt B_itype = 1; 3336 PetscBLASInt B_IL, B_IU; 3337 PetscReal eps = -1.0; /* dlamch? */ 3338 PetscInt nmin_s; 3339 PetscBool compute_range; 3340 3341 B_neigs = 0; 3342 compute_range = (PetscBool)!same_data; 3343 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3344 3345 if (pcbddc->dbg_flag) { 3346 PetscInt nc = 0; 3347 3348 if (sub_schurs->change_primal_sub) { 3349 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc)); 3350 } 3351 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc)); 3352 } 3353 3354 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3355 if (compute_range) { 3356 3357 /* ask for eigenvalues larger than thresh */ 3358 if (sub_schurs->is_posdef) { 3359 #if defined(PETSC_USE_COMPLEX) 3360 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3361 #else 3362 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3363 #endif 3364 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3365 } else { /* no theory so far, but it works nicely */ 3366 PetscInt recipe = 0,recipe_m = 1; 3367 PetscReal bb[2]; 3368 3369 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL)); 3370 switch (recipe) { 3371 case 0: 3372 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3373 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3374 #if defined(PETSC_USE_COMPLEX) 3375 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3376 #else 3377 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3378 #endif 3379 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3380 break; 3381 case 1: 3382 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3383 #if defined(PETSC_USE_COMPLEX) 3384 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3385 #else 3386 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3387 #endif 3388 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3389 if (!scal) { 3390 PetscBLASInt B_neigs2 = 0; 3391 3392 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3393 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3394 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3395 #if defined(PETSC_USE_COMPLEX) 3396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3397 #else 3398 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3399 #endif 3400 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3401 B_neigs += B_neigs2; 3402 } 3403 break; 3404 case 2: 3405 if (scal) { 3406 bb[0] = PETSC_MIN_REAL; 3407 bb[1] = 0; 3408 #if defined(PETSC_USE_COMPLEX) 3409 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3410 #else 3411 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3412 #endif 3413 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3414 } else { 3415 PetscBLASInt B_neigs2 = 0; 3416 PetscBool import = PETSC_FALSE; 3417 3418 lthresh = PetscMax(lthresh,0.0); 3419 if (lthresh > 0.0) { 3420 bb[0] = PETSC_MIN_REAL; 3421 bb[1] = lthresh*lthresh; 3422 3423 import = PETSC_TRUE; 3424 #if defined(PETSC_USE_COMPLEX) 3425 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3426 #else 3427 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3428 #endif 3429 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3430 } 3431 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3432 bb[1] = PETSC_MAX_REAL; 3433 if (import) { 3434 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3435 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3436 } 3437 #if defined(PETSC_USE_COMPLEX) 3438 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3439 #else 3440 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3441 #endif 3442 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3443 B_neigs += B_neigs2; 3444 } 3445 break; 3446 case 3: 3447 if (scal) { 3448 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL)); 3449 } else { 3450 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL)); 3451 } 3452 if (!scal) { 3453 bb[0] = uthresh; 3454 bb[1] = PETSC_MAX_REAL; 3455 #if defined(PETSC_USE_COMPLEX) 3456 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3457 #else 3458 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3459 #endif 3460 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3461 } 3462 if (recipe_m > 0 && B_N - B_neigs > 0) { 3463 PetscBLASInt B_neigs2 = 0; 3464 3465 B_IL = 1; 3466 PetscCall(PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU)); 3467 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3468 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3469 #if defined(PETSC_USE_COMPLEX) 3470 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3471 #else 3472 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3473 #endif 3474 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3475 B_neigs += B_neigs2; 3476 } 3477 break; 3478 case 4: 3479 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3480 #if defined(PETSC_USE_COMPLEX) 3481 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3482 #else 3483 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3484 #endif 3485 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3486 { 3487 PetscBLASInt B_neigs2 = 0; 3488 3489 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3490 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3491 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3492 #if defined(PETSC_USE_COMPLEX) 3493 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3494 #else 3495 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3496 #endif 3497 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3498 B_neigs += B_neigs2; 3499 } 3500 break; 3501 case 5: /* same as before: first compute all eigenvalues, then filter */ 3502 #if defined(PETSC_USE_COMPLEX) 3503 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3504 #else 3505 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3506 #endif 3507 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3508 { 3509 PetscInt e,k,ne; 3510 for (e=0,ne=0;e<B_neigs;e++) { 3511 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3512 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3513 eigs[ne] = eigs[e]; 3514 ne++; 3515 } 3516 } 3517 PetscCall(PetscArraycpy(eigv,S,B_N*ne)); 3518 B_neigs = ne; 3519 } 3520 break; 3521 default: 3522 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %" PetscInt_FMT,recipe); 3523 } 3524 } 3525 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3526 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3527 B_IL = 1; 3528 #if defined(PETSC_USE_COMPLEX) 3529 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3530 #else 3531 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3532 #endif 3533 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3534 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3535 PetscInt k; 3536 PetscCheck(sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3537 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax)); 3538 PetscCall(PetscBLASIntCast(nmax,&B_neigs)); 3539 nmin = nmax; 3540 PetscCall(PetscArrayzero(eigv,subset_size*nmax)); 3541 for (k=0;k<nmax;k++) { 3542 eigs[k] = 1./PETSC_SMALL; 3543 eigv[k*(subset_size+1)] = 1.0; 3544 } 3545 } 3546 PetscCall(PetscFPTrapPop()); 3547 if (B_ierr) { 3548 PetscCheck(B_ierr >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT,-B_ierr); 3549 PetscCheck(B_ierr > B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge",B_ierr); 3550 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite",B_ierr-B_N-1); 3551 } 3552 3553 if (B_neigs > nmax) { 3554 if (pcbddc->dbg_flag) { 3555 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n",B_neigs,nmax)); 3556 } 3557 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3558 B_neigs = nmax; 3559 } 3560 3561 nmin_s = PetscMin(nmin,B_N); 3562 if (B_neigs < nmin_s) { 3563 PetscBLASInt B_neigs2 = 0; 3564 3565 if (pcbddc->use_deluxe_scaling) { 3566 if (scal) { 3567 B_IU = nmin_s; 3568 B_IL = B_neigs + 1; 3569 } else { 3570 B_IL = B_N - nmin_s + 1; 3571 B_IU = B_N - B_neigs; 3572 } 3573 } else { 3574 B_IL = B_neigs + 1; 3575 B_IU = nmin_s; 3576 } 3577 if (pcbddc->dbg_flag) { 3578 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU)); 3579 } 3580 if (sub_schurs->is_symmetric) { 3581 PetscInt j,k; 3582 for (j=0;j<subset_size;j++) { 3583 for (k=j;k<subset_size;k++) { 3584 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3585 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3586 } 3587 } 3588 } else { 3589 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3590 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3591 } 3592 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3593 #if defined(PETSC_USE_COMPLEX) 3594 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3595 #else 3596 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3597 #endif 3598 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3599 PetscCall(PetscFPTrapPop()); 3600 B_neigs += B_neigs2; 3601 } 3602 if (B_ierr) { 3603 PetscCheck(B_ierr >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT,-B_ierr); 3604 PetscCheck(B_ierr > B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge",B_ierr); 3605 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite",B_ierr-B_N-1); 3606 } 3607 if (pcbddc->dbg_flag) { 3608 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %" PetscBLASInt_FMT " eigs\n",B_neigs)); 3609 for (j=0;j<B_neigs;j++) { 3610 if (eigs[j] == 0.0) { 3611 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n")); 3612 } else { 3613 if (pcbddc->use_deluxe_scaling) { 3614 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",(double)eigs[j+eigs_start])); 3615 } else { 3616 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",(double)(1./eigs[j+eigs_start]))); 3617 } 3618 } 3619 } 3620 } 3621 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3622 } 3623 /* change the basis back to the original one */ 3624 if (sub_schurs->change) { 3625 Mat change,phi,phit; 3626 3627 if (pcbddc->dbg_flag > 2) { 3628 PetscInt ii; 3629 for (ii=0;ii<B_neigs;ii++) { 3630 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n",ii,B_neigs,B_N)); 3631 for (j=0;j<B_N;j++) { 3632 #if defined(PETSC_USE_COMPLEX) 3633 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3634 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3635 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",(double)r,(double)c)); 3636 #else 3637 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",(double)(eigv[(ii+eigs_start)*subset_size+j]))); 3638 #endif 3639 } 3640 } 3641 } 3642 PetscCall(KSPGetOperators(sub_schurs->change[i],&change,NULL)); 3643 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit)); 3644 PetscCall(MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi)); 3645 PetscCall(MatCopy(phi,phit,SAME_NONZERO_PATTERN)); 3646 PetscCall(MatDestroy(&phit)); 3647 PetscCall(MatDestroy(&phi)); 3648 } 3649 maxneigs = PetscMax(B_neigs,maxneigs); 3650 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3651 if (B_neigs) { 3652 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size)); 3653 3654 if (pcbddc->dbg_flag > 1) { 3655 PetscInt ii; 3656 for (ii=0;ii<B_neigs;ii++) { 3657 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n",ii,B_neigs,B_N)); 3658 for (j=0;j<B_N;j++) { 3659 #if defined(PETSC_USE_COMPLEX) 3660 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3661 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3662 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",(double)r,(double)c)); 3663 #else 3664 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",(double)PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]))); 3665 #endif 3666 } 3667 } 3668 } 3669 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size)); 3670 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3671 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3672 cum++; 3673 } 3674 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs)); 3675 /* shift for next computation */ 3676 cumarray += subset_size*subset_size; 3677 } 3678 if (pcbddc->dbg_flag) { 3679 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3680 } 3681 3682 if (mss) { 3683 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3684 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3685 /* destroy matrices (junk) */ 3686 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3687 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3688 } 3689 if (allocated_S_St) { 3690 PetscCall(PetscFree2(S,St)); 3691 } 3692 PetscCall(PetscFree5(eigv,eigs,work,B_iwork,B_ifail)); 3693 #if defined(PETSC_USE_COMPLEX) 3694 PetscCall(PetscFree(rwork)); 3695 #endif 3696 if (pcbddc->dbg_flag) { 3697 PetscInt maxneigs_r; 3698 PetscCall(MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc))); 3699 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %" PetscInt_FMT "\n",maxneigs_r)); 3700 } 3701 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3702 PetscFunctionReturn(0); 3703 } 3704 3705 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3706 { 3707 PetscScalar *coarse_submat_vals; 3708 3709 PetscFunctionBegin; 3710 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3711 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3712 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3713 3714 /* Setup local neumann solver ksp_R */ 3715 /* PCBDDCSetUpLocalScatters should be called first! */ 3716 PetscCall(PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE)); 3717 3718 /* 3719 Setup local correction and local part of coarse basis. 3720 Gives back the dense local part of the coarse matrix in column major ordering 3721 */ 3722 PetscCall(PCBDDCSetUpCorrection(pc,&coarse_submat_vals)); 3723 3724 /* Compute total number of coarse nodes and setup coarse solver */ 3725 PetscCall(PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals)); 3726 3727 /* free */ 3728 PetscCall(PetscFree(coarse_submat_vals)); 3729 PetscFunctionReturn(0); 3730 } 3731 3732 PetscErrorCode PCBDDCResetCustomization(PC pc) 3733 { 3734 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3735 3736 PetscFunctionBegin; 3737 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3738 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3739 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3740 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3741 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3742 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3743 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3744 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3745 PetscCall(PCBDDCSetDofsSplitting(pc,0,NULL)); 3746 PetscCall(PCBDDCSetDofsSplittingLocal(pc,0,NULL)); 3747 PetscFunctionReturn(0); 3748 } 3749 3750 PetscErrorCode PCBDDCResetTopography(PC pc) 3751 { 3752 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3753 PetscInt i; 3754 3755 PetscFunctionBegin; 3756 PetscCall(MatDestroy(&pcbddc->nedcG)); 3757 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3758 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3759 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3760 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3761 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3762 PetscCall(VecDestroy(&pcbddc->work_change)); 3763 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3764 PetscCall(MatDestroy(&pcbddc->divudotp)); 3765 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3766 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3767 for (i=0;i<pcbddc->n_local_subs;i++) { 3768 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3769 } 3770 pcbddc->n_local_subs = 0; 3771 PetscCall(PetscFree(pcbddc->local_subs)); 3772 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3773 pcbddc->graphanalyzed = PETSC_FALSE; 3774 pcbddc->recompute_topography = PETSC_TRUE; 3775 pcbddc->corner_selected = PETSC_FALSE; 3776 PetscFunctionReturn(0); 3777 } 3778 3779 PetscErrorCode PCBDDCResetSolvers(PC pc) 3780 { 3781 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3782 3783 PetscFunctionBegin; 3784 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3785 if (pcbddc->coarse_phi_B) { 3786 PetscScalar *array; 3787 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&array)); 3788 PetscCall(PetscFree(array)); 3789 } 3790 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3791 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3792 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3793 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3794 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3795 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3796 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3797 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3798 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3799 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3800 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3801 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3802 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3803 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3804 PetscCall(KSPReset(pcbddc->ksp_D)); 3805 PetscCall(KSPReset(pcbddc->ksp_R)); 3806 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3807 PetscCall(MatDestroy(&pcbddc->local_mat)); 3808 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3809 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 3810 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3811 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3812 PetscCall(MatDestroy(&pcbddc->benign_change)); 3813 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3814 PetscCall(PCBDDCBenignShellMat(pc,PETSC_TRUE)); 3815 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3816 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3817 if (pcbddc->benign_zerodiag_subs) { 3818 PetscInt i; 3819 for (i=0;i<pcbddc->benign_n;i++) { 3820 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3821 } 3822 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3823 } 3824 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 3825 PetscFunctionReturn(0); 3826 } 3827 3828 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3829 { 3830 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3831 PC_IS *pcis = (PC_IS*)pc->data; 3832 VecType impVecType; 3833 PetscInt n_constraints,n_R,old_size; 3834 3835 PetscFunctionBegin; 3836 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3837 n_R = pcis->n - pcbddc->n_vertices; 3838 PetscCall(VecGetType(pcis->vec1_N,&impVecType)); 3839 /* local work vectors (try to avoid unneeded work)*/ 3840 /* R nodes */ 3841 old_size = -1; 3842 if (pcbddc->vec1_R) { 3843 PetscCall(VecGetSize(pcbddc->vec1_R,&old_size)); 3844 } 3845 if (n_R != old_size) { 3846 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3847 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3848 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R)); 3849 PetscCall(VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R)); 3850 PetscCall(VecSetType(pcbddc->vec1_R,impVecType)); 3851 PetscCall(VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R)); 3852 } 3853 /* local primal dofs */ 3854 old_size = -1; 3855 if (pcbddc->vec1_P) { 3856 PetscCall(VecGetSize(pcbddc->vec1_P,&old_size)); 3857 } 3858 if (pcbddc->local_primal_size != old_size) { 3859 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3860 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P)); 3861 PetscCall(VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size)); 3862 PetscCall(VecSetType(pcbddc->vec1_P,impVecType)); 3863 } 3864 /* local explicit constraints */ 3865 old_size = -1; 3866 if (pcbddc->vec1_C) { 3867 PetscCall(VecGetSize(pcbddc->vec1_C,&old_size)); 3868 } 3869 if (n_constraints && n_constraints != old_size) { 3870 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3871 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C)); 3872 PetscCall(VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints)); 3873 PetscCall(VecSetType(pcbddc->vec1_C,impVecType)); 3874 } 3875 PetscFunctionReturn(0); 3876 } 3877 3878 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3879 { 3880 /* pointers to pcis and pcbddc */ 3881 PC_IS* pcis = (PC_IS*)pc->data; 3882 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3883 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3884 /* submatrices of local problem */ 3885 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3886 /* submatrices of local coarse problem */ 3887 Mat S_VV,S_CV,S_VC,S_CC; 3888 /* working matrices */ 3889 Mat C_CR; 3890 /* additional working stuff */ 3891 PC pc_R; 3892 Mat F,Brhs = NULL; 3893 Vec dummy_vec; 3894 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3895 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3896 PetscScalar *work; 3897 PetscInt *idx_V_B; 3898 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3899 PetscInt i,n_R,n_D,n_B; 3900 PetscScalar one=1.0,m_one=-1.0; 3901 3902 PetscFunctionBegin; 3903 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3904 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 3905 3906 /* Set Non-overlapping dimensions */ 3907 n_vertices = pcbddc->n_vertices; 3908 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3909 n_B = pcis->n_B; 3910 n_D = pcis->n - n_B; 3911 n_R = pcis->n - n_vertices; 3912 3913 /* vertices in boundary numbering */ 3914 PetscCall(PetscMalloc1(n_vertices,&idx_V_B)); 3915 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B)); 3916 PetscCheck(i == n_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT,n_vertices,i); 3917 3918 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3919 PetscCall(PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals)); 3920 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV)); 3921 PetscCall(MatDenseSetLDA(S_VV,pcbddc->local_primal_size)); 3922 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV)); 3923 PetscCall(MatDenseSetLDA(S_CV,pcbddc->local_primal_size)); 3924 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC)); 3925 PetscCall(MatDenseSetLDA(S_VC,pcbddc->local_primal_size)); 3926 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC)); 3927 PetscCall(MatDenseSetLDA(S_CC,pcbddc->local_primal_size)); 3928 3929 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3930 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_R)); 3931 PetscCall(PCSetUp(pc_R)); 3932 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU)); 3933 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL)); 3934 lda_rhs = n_R; 3935 need_benign_correction = PETSC_FALSE; 3936 if (isLU || isCHOL) { 3937 PetscCall(PCFactorGetMatrix(pc_R,&F)); 3938 } else if (sub_schurs && sub_schurs->reuse_solver) { 3939 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3940 MatFactorType type; 3941 3942 F = reuse_solver->F; 3943 PetscCall(MatGetFactorType(F,&type)); 3944 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3945 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3946 PetscCall(MatGetSize(F,&lda_rhs,NULL)); 3947 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3948 } else F = NULL; 3949 3950 /* determine if we can use a sparse right-hand side */ 3951 sparserhs = PETSC_FALSE; 3952 if (F) { 3953 MatSolverType solver; 3954 3955 PetscCall(MatFactorGetSolverType(F,&solver)); 3956 PetscCall(PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs)); 3957 } 3958 3959 /* allocate workspace */ 3960 n = 0; 3961 if (n_constraints) { 3962 n += lda_rhs*n_constraints; 3963 } 3964 if (n_vertices) { 3965 n = PetscMax(2*lda_rhs*n_vertices,n); 3966 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3967 } 3968 if (!pcbddc->symmetric_primal) { 3969 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3970 } 3971 PetscCall(PetscMalloc1(n,&work)); 3972 3973 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3974 dummy_vec = NULL; 3975 if (need_benign_correction && lda_rhs != n_R && F) { 3976 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec)); 3977 PetscCall(VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE)); 3978 PetscCall(VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name)); 3979 } 3980 3981 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3982 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3983 3984 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3985 if (n_constraints) { 3986 Mat M3,C_B; 3987 IS is_aux; 3988 3989 /* Extract constraints on R nodes: C_{CR} */ 3990 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux)); 3991 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR)); 3992 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 3993 3994 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3995 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3996 if (!sparserhs) { 3997 PetscCall(PetscArrayzero(work,lda_rhs*n_constraints)); 3998 for (i=0;i<n_constraints;i++) { 3999 const PetscScalar *row_cmat_values; 4000 const PetscInt *row_cmat_indices; 4001 PetscInt size_of_constraint,j; 4002 4003 PetscCall(MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 4004 for (j=0;j<size_of_constraint;j++) { 4005 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4006 } 4007 PetscCall(MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 4008 } 4009 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs)); 4010 } else { 4011 Mat tC_CR; 4012 4013 PetscCall(MatScale(C_CR,-1.0)); 4014 if (lda_rhs != n_R) { 4015 PetscScalar *aa; 4016 PetscInt r,*ii,*jj; 4017 PetscBool done; 4018 4019 PetscCall(MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4020 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4021 PetscCall(MatSeqAIJGetArray(C_CR,&aa)); 4022 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR)); 4023 PetscCall(MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4024 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4025 } else { 4026 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4027 tC_CR = C_CR; 4028 } 4029 PetscCall(MatCreateTranspose(tC_CR,&Brhs)); 4030 PetscCall(MatDestroy(&tC_CR)); 4031 } 4032 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R)); 4033 if (F) { 4034 if (need_benign_correction) { 4035 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4036 4037 /* rhs is already zero on interior dofs, no need to change the rhs */ 4038 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n)); 4039 } 4040 PetscCall(MatMatSolve(F,Brhs,local_auxmat2_R)); 4041 if (need_benign_correction) { 4042 PetscScalar *marr; 4043 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4044 4045 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4046 if (lda_rhs != n_R) { 4047 for (i=0;i<n_constraints;i++) { 4048 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4049 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4050 PetscCall(VecResetArray(dummy_vec)); 4051 } 4052 } else { 4053 for (i=0;i<n_constraints;i++) { 4054 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4055 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4056 PetscCall(VecResetArray(pcbddc->vec1_R)); 4057 } 4058 } 4059 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4060 } 4061 } else { 4062 PetscScalar *marr; 4063 4064 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4065 for (i=0;i<n_constraints;i++) { 4066 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4067 PetscCall(VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs)); 4068 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4069 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4070 PetscCall(VecResetArray(pcbddc->vec1_R)); 4071 PetscCall(VecResetArray(pcbddc->vec2_R)); 4072 } 4073 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4074 } 4075 if (sparserhs) { 4076 PetscCall(MatScale(C_CR,-1.0)); 4077 } 4078 PetscCall(MatDestroy(&Brhs)); 4079 if (!pcbddc->switch_static) { 4080 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2)); 4081 for (i=0;i<n_constraints;i++) { 4082 Vec r, b; 4083 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R,i,&r)); 4084 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2,i,&b)); 4085 PetscCall(VecScatterBegin(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4086 PetscCall(VecScatterEnd(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4087 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2,i,&b)); 4088 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R,i,&r)); 4089 } 4090 PetscCall(MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4091 } else { 4092 if (lda_rhs != n_R) { 4093 IS dummy; 4094 4095 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy)); 4096 PetscCall(MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2)); 4097 PetscCall(ISDestroy(&dummy)); 4098 } else { 4099 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4100 pcbddc->local_auxmat2 = local_auxmat2_R; 4101 } 4102 PetscCall(MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4103 } 4104 PetscCall(ISDestroy(&is_aux)); 4105 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4106 PetscCall(MatScale(M3,m_one)); 4107 if (isCHOL) { 4108 PetscCall(MatCholeskyFactor(M3,NULL,NULL)); 4109 } else { 4110 PetscCall(MatLUFactor(M3,NULL,NULL,NULL)); 4111 } 4112 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 4113 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4114 PetscCall(MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1)); 4115 PetscCall(MatDestroy(&C_B)); 4116 PetscCall(MatCopy(M3,S_CC,SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4117 PetscCall(MatDestroy(&M3)); 4118 } 4119 4120 /* Get submatrices from subdomain matrix */ 4121 if (n_vertices) { 4122 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4123 PetscBool oldpin; 4124 #endif 4125 PetscBool isaij; 4126 IS is_aux; 4127 4128 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4129 IS tis; 4130 4131 PetscCall(ISDuplicate(pcbddc->is_R_local,&tis)); 4132 PetscCall(ISSort(tis)); 4133 PetscCall(ISComplement(tis,0,pcis->n,&is_aux)); 4134 PetscCall(ISDestroy(&tis)); 4135 } else { 4136 PetscCall(ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux)); 4137 } 4138 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4139 oldpin = pcbddc->local_mat->boundtocpu; 4140 #endif 4141 PetscCall(MatBindToCPU(pcbddc->local_mat,PETSC_TRUE)); 4142 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV)); 4143 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR)); 4144 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij)); 4145 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4146 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4147 } 4148 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV)); 4149 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4150 PetscCall(MatBindToCPU(pcbddc->local_mat,oldpin)); 4151 #endif 4152 PetscCall(ISDestroy(&is_aux)); 4153 } 4154 4155 /* Matrix of coarse basis functions (local) */ 4156 if (pcbddc->coarse_phi_B) { 4157 PetscInt on_B,on_primal,on_D=n_D; 4158 if (pcbddc->coarse_phi_D) { 4159 PetscCall(MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL)); 4160 } 4161 PetscCall(MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal)); 4162 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4163 PetscScalar *marray; 4164 4165 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&marray)); 4166 PetscCall(PetscFree(marray)); 4167 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4168 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4169 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4170 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4171 } 4172 } 4173 4174 if (!pcbddc->coarse_phi_B) { 4175 PetscScalar *marr; 4176 4177 /* memory size */ 4178 n = n_B*pcbddc->local_primal_size; 4179 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4180 if (!pcbddc->symmetric_primal) n *= 2; 4181 PetscCall(PetscCalloc1(n,&marr)); 4182 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B)); 4183 marr += n_B*pcbddc->local_primal_size; 4184 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4185 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D)); 4186 marr += n_D*pcbddc->local_primal_size; 4187 } 4188 if (!pcbddc->symmetric_primal) { 4189 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B)); 4190 marr += n_B*pcbddc->local_primal_size; 4191 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4192 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D)); 4193 } 4194 } else { 4195 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4196 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4197 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4198 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4199 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4200 } 4201 } 4202 } 4203 4204 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4205 p0_lidx_I = NULL; 4206 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4207 const PetscInt *idxs; 4208 4209 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 4210 PetscCall(PetscMalloc1(pcbddc->benign_n,&p0_lidx_I)); 4211 for (i=0;i<pcbddc->benign_n;i++) { 4212 PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i])); 4213 } 4214 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 4215 } 4216 4217 /* vertices */ 4218 if (n_vertices) { 4219 PetscBool restoreavr = PETSC_FALSE; 4220 4221 PetscCall(MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV)); 4222 4223 if (n_R) { 4224 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4225 PetscBLASInt B_N,B_one = 1; 4226 const PetscScalar *x; 4227 PetscScalar *y; 4228 4229 PetscCall(MatScale(A_RV,m_one)); 4230 if (need_benign_correction) { 4231 ISLocalToGlobalMapping RtoN; 4232 IS is_p0; 4233 PetscInt *idxs_p0,n; 4234 4235 PetscCall(PetscMalloc1(pcbddc->benign_n,&idxs_p0)); 4236 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN)); 4237 PetscCall(ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0)); 4238 PetscCheck(n == pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT,n,pcbddc->benign_n); 4239 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4240 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0)); 4241 PetscCall(MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr)); 4242 PetscCall(ISDestroy(&is_p0)); 4243 } 4244 4245 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV)); 4246 if (!sparserhs || need_benign_correction) { 4247 if (lda_rhs == n_R) { 4248 PetscCall(MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV)); 4249 } else { 4250 PetscScalar *av,*array; 4251 const PetscInt *xadj,*adjncy; 4252 PetscInt n; 4253 PetscBool flg_row; 4254 4255 array = work+lda_rhs*n_vertices; 4256 PetscCall(PetscArrayzero(array,lda_rhs*n_vertices)); 4257 PetscCall(MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV)); 4258 PetscCall(MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4259 PetscCall(MatSeqAIJGetArray(A_RV,&av)); 4260 for (i=0;i<n;i++) { 4261 PetscInt j; 4262 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4263 } 4264 PetscCall(MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4265 PetscCall(MatDestroy(&A_RV)); 4266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV)); 4267 } 4268 if (need_benign_correction) { 4269 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4270 PetscScalar *marr; 4271 4272 PetscCall(MatDenseGetArray(A_RV,&marr)); 4273 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4274 4275 | 0 0 0 | (V) 4276 L = | 0 0 -1 | (P-p0) 4277 | 0 0 -1 | (p0) 4278 4279 */ 4280 for (i=0;i<reuse_solver->benign_n;i++) { 4281 const PetscScalar *vals; 4282 const PetscInt *idxs,*idxs_zero; 4283 PetscInt n,j,nz; 4284 4285 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4286 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4287 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4288 for (j=0;j<n;j++) { 4289 PetscScalar val = vals[j]; 4290 PetscInt k,col = idxs[j]; 4291 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4292 } 4293 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4294 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4295 } 4296 PetscCall(MatDenseRestoreArray(A_RV,&marr)); 4297 } 4298 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4299 Brhs = A_RV; 4300 } else { 4301 Mat tA_RVT,A_RVT; 4302 4303 if (!pcbddc->symmetric_primal) { 4304 /* A_RV already scaled by -1 */ 4305 PetscCall(MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT)); 4306 } else { 4307 restoreavr = PETSC_TRUE; 4308 PetscCall(MatScale(A_VR,-1.0)); 4309 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4310 A_RVT = A_VR; 4311 } 4312 if (lda_rhs != n_R) { 4313 PetscScalar *aa; 4314 PetscInt r,*ii,*jj; 4315 PetscBool done; 4316 4317 PetscCall(MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4318 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4319 PetscCall(MatSeqAIJGetArray(A_RVT,&aa)); 4320 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT)); 4321 PetscCall(MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4322 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4323 } else { 4324 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4325 tA_RVT = A_RVT; 4326 } 4327 PetscCall(MatCreateTranspose(tA_RVT,&Brhs)); 4328 PetscCall(MatDestroy(&tA_RVT)); 4329 PetscCall(MatDestroy(&A_RVT)); 4330 } 4331 if (F) { 4332 /* need to correct the rhs */ 4333 if (need_benign_correction) { 4334 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4335 PetscScalar *marr; 4336 4337 PetscCall(MatDenseGetArray(Brhs,&marr)); 4338 if (lda_rhs != n_R) { 4339 for (i=0;i<n_vertices;i++) { 4340 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4341 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE)); 4342 PetscCall(VecResetArray(dummy_vec)); 4343 } 4344 } else { 4345 for (i=0;i<n_vertices;i++) { 4346 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4347 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE)); 4348 PetscCall(VecResetArray(pcbddc->vec1_R)); 4349 } 4350 } 4351 PetscCall(MatDenseRestoreArray(Brhs,&marr)); 4352 } 4353 PetscCall(MatMatSolve(F,Brhs,A_RRmA_RV)); 4354 if (restoreavr) { 4355 PetscCall(MatScale(A_VR,-1.0)); 4356 } 4357 /* need to correct the solution */ 4358 if (need_benign_correction) { 4359 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4360 PetscScalar *marr; 4361 4362 PetscCall(MatDenseGetArray(A_RRmA_RV,&marr)); 4363 if (lda_rhs != n_R) { 4364 for (i=0;i<n_vertices;i++) { 4365 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4366 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4367 PetscCall(VecResetArray(dummy_vec)); 4368 } 4369 } else { 4370 for (i=0;i<n_vertices;i++) { 4371 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4372 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4373 PetscCall(VecResetArray(pcbddc->vec1_R)); 4374 } 4375 } 4376 PetscCall(MatDenseRestoreArray(A_RRmA_RV,&marr)); 4377 } 4378 } else { 4379 PetscCall(MatDenseGetArray(Brhs,&y)); 4380 for (i=0;i<n_vertices;i++) { 4381 PetscCall(VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs)); 4382 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs)); 4383 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4384 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4385 PetscCall(VecResetArray(pcbddc->vec1_R)); 4386 PetscCall(VecResetArray(pcbddc->vec2_R)); 4387 } 4388 PetscCall(MatDenseRestoreArray(Brhs,&y)); 4389 } 4390 PetscCall(MatDestroy(&A_RV)); 4391 PetscCall(MatDestroy(&Brhs)); 4392 /* S_VV and S_CV */ 4393 if (n_constraints) { 4394 Mat B; 4395 4396 PetscCall(PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices)); 4397 for (i=0;i<n_vertices;i++) { 4398 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4399 PetscCall(VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B)); 4400 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4401 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4402 PetscCall(VecResetArray(pcis->vec1_B)); 4403 PetscCall(VecResetArray(pcbddc->vec1_R)); 4404 } 4405 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B)); 4406 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4407 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV)); 4408 PetscCall(MatProductSetType(S_CV,MATPRODUCT_AB)); 4409 PetscCall(MatProductSetFromOptions(S_CV)); 4410 PetscCall(MatProductSymbolic(S_CV)); 4411 PetscCall(MatProductNumeric(S_CV)); 4412 PetscCall(MatProductClear(S_CV)); 4413 4414 PetscCall(MatDestroy(&B)); 4415 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B)); 4416 /* Reuse B = local_auxmat2_R * S_CV */ 4417 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B)); 4418 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4419 PetscCall(MatProductSetFromOptions(B)); 4420 PetscCall(MatProductSymbolic(B)); 4421 PetscCall(MatProductNumeric(B)); 4422 4423 PetscCall(MatScale(S_CV,m_one)); 4424 PetscCall(PetscBLASIntCast(lda_rhs*n_vertices,&B_N)); 4425 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4426 PetscCall(MatDestroy(&B)); 4427 } 4428 if (lda_rhs != n_R) { 4429 PetscCall(MatDestroy(&A_RRmA_RV)); 4430 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV)); 4431 PetscCall(MatDenseSetLDA(A_RRmA_RV,lda_rhs)); 4432 } 4433 PetscCall(MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt)); 4434 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4435 if (need_benign_correction) { 4436 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4437 PetscScalar *marr,*sums; 4438 4439 PetscCall(PetscMalloc1(n_vertices,&sums)); 4440 PetscCall(MatDenseGetArray(S_VVt,&marr)); 4441 for (i=0;i<reuse_solver->benign_n;i++) { 4442 const PetscScalar *vals; 4443 const PetscInt *idxs,*idxs_zero; 4444 PetscInt n,j,nz; 4445 4446 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4447 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4448 for (j=0;j<n_vertices;j++) { 4449 PetscInt k; 4450 sums[j] = 0.; 4451 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4452 } 4453 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4454 for (j=0;j<n;j++) { 4455 PetscScalar val = vals[j]; 4456 PetscInt k; 4457 for (k=0;k<n_vertices;k++) { 4458 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4459 } 4460 } 4461 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4462 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4463 } 4464 PetscCall(PetscFree(sums)); 4465 PetscCall(MatDenseRestoreArray(S_VVt,&marr)); 4466 PetscCall(MatDestroy(&A_RV_bcorr)); 4467 } 4468 PetscCall(MatDestroy(&A_RRmA_RV)); 4469 PetscCall(PetscBLASIntCast(n_vertices*n_vertices,&B_N)); 4470 PetscCall(MatDenseGetArrayRead(A_VV,&x)); 4471 PetscCall(MatDenseGetArray(S_VVt,&y)); 4472 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4473 PetscCall(MatDenseRestoreArrayRead(A_VV,&x)); 4474 PetscCall(MatDenseRestoreArray(S_VVt,&y)); 4475 PetscCall(MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN)); 4476 PetscCall(MatDestroy(&S_VVt)); 4477 } else { 4478 PetscCall(MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN)); 4479 } 4480 PetscCall(MatDestroy(&A_VV)); 4481 4482 /* coarse basis functions */ 4483 for (i=0;i<n_vertices;i++) { 4484 Vec v; 4485 PetscScalar one = 1.0,zero = 0.0; 4486 4487 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4488 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i,&v)); 4489 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4490 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4491 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4492 PetscMPIInt rank; 4493 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),&rank)); 4494 PetscCheck(rank <= 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4495 } 4496 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4497 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4498 PetscCall(VecAssemblyEnd(v)); 4499 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i,&v)); 4500 4501 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4502 PetscInt j; 4503 4504 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i,&v)); 4505 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4506 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4507 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4508 PetscMPIInt rank; 4509 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),&rank)); 4510 PetscCheck(rank <= 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4511 } 4512 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4513 PetscCall(VecAssemblyBegin(v)); 4514 PetscCall(VecAssemblyEnd(v)); 4515 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i,&v)); 4516 } 4517 PetscCall(VecResetArray(pcbddc->vec1_R)); 4518 } 4519 /* if n_R == 0 the object is not destroyed */ 4520 PetscCall(MatDestroy(&A_RV)); 4521 } 4522 PetscCall(VecDestroy(&dummy_vec)); 4523 4524 if (n_constraints) { 4525 Mat B; 4526 4527 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B)); 4528 PetscCall(MatScale(S_CC,m_one)); 4529 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B)); 4530 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4531 PetscCall(MatProductSetFromOptions(B)); 4532 PetscCall(MatProductSymbolic(B)); 4533 PetscCall(MatProductNumeric(B)); 4534 4535 PetscCall(MatScale(S_CC,m_one)); 4536 if (n_vertices) { 4537 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4538 PetscCall(MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC)); 4539 } else { 4540 Mat S_VCt; 4541 4542 if (lda_rhs != n_R) { 4543 PetscCall(MatDestroy(&B)); 4544 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B)); 4545 PetscCall(MatDenseSetLDA(B,lda_rhs)); 4546 } 4547 PetscCall(MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt)); 4548 PetscCall(MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN)); 4549 PetscCall(MatDestroy(&S_VCt)); 4550 } 4551 } 4552 PetscCall(MatDestroy(&B)); 4553 /* coarse basis functions */ 4554 for (i=0;i<n_constraints;i++) { 4555 Vec v; 4556 4557 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4558 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4559 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4560 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4561 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4562 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4563 PetscInt j; 4564 PetscScalar zero = 0.0; 4565 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4566 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4567 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4568 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4569 PetscCall(VecAssemblyBegin(v)); 4570 PetscCall(VecAssemblyEnd(v)); 4571 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4572 } 4573 PetscCall(VecResetArray(pcbddc->vec1_R)); 4574 } 4575 } 4576 if (n_constraints) { 4577 PetscCall(MatDestroy(&local_auxmat2_R)); 4578 } 4579 PetscCall(PetscFree(p0_lidx_I)); 4580 4581 /* coarse matrix entries relative to B_0 */ 4582 if (pcbddc->benign_n) { 4583 Mat B0_B,B0_BPHI; 4584 IS is_dummy; 4585 const PetscScalar *data; 4586 PetscInt j; 4587 4588 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4589 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4590 PetscCall(ISDestroy(&is_dummy)); 4591 PetscCall(MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4592 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4593 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data)); 4594 for (j=0;j<pcbddc->benign_n;j++) { 4595 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4596 for (i=0;i<pcbddc->local_primal_size;i++) { 4597 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4598 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4599 } 4600 } 4601 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data)); 4602 PetscCall(MatDestroy(&B0_B)); 4603 PetscCall(MatDestroy(&B0_BPHI)); 4604 } 4605 4606 /* compute other basis functions for non-symmetric problems */ 4607 if (!pcbddc->symmetric_primal) { 4608 Mat B_V=NULL,B_C=NULL; 4609 PetscScalar *marray; 4610 4611 if (n_constraints) { 4612 Mat S_CCT,C_CRT; 4613 4614 PetscCall(MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT)); 4615 PetscCall(MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT)); 4616 PetscCall(MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C)); 4617 PetscCall(MatDestroy(&S_CCT)); 4618 if (n_vertices) { 4619 Mat S_VCT; 4620 4621 PetscCall(MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT)); 4622 PetscCall(MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V)); 4623 PetscCall(MatDestroy(&S_VCT)); 4624 } 4625 PetscCall(MatDestroy(&C_CRT)); 4626 } else { 4627 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V)); 4628 } 4629 if (n_vertices && n_R) { 4630 PetscScalar *av,*marray; 4631 const PetscInt *xadj,*adjncy; 4632 PetscInt n; 4633 PetscBool flg_row; 4634 4635 /* B_V = B_V - A_VR^T */ 4636 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4637 PetscCall(MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4638 PetscCall(MatSeqAIJGetArray(A_VR,&av)); 4639 PetscCall(MatDenseGetArray(B_V,&marray)); 4640 for (i=0;i<n;i++) { 4641 PetscInt j; 4642 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4643 } 4644 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4645 PetscCall(MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4646 PetscCall(MatDestroy(&A_VR)); 4647 } 4648 4649 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4650 if (n_vertices) { 4651 PetscCall(MatDenseGetArray(B_V,&marray)); 4652 for (i=0;i<n_vertices;i++) { 4653 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+i*n_R)); 4654 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4655 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4656 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4657 PetscCall(VecResetArray(pcbddc->vec1_R)); 4658 PetscCall(VecResetArray(pcbddc->vec2_R)); 4659 } 4660 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4661 } 4662 if (B_C) { 4663 PetscCall(MatDenseGetArray(B_C,&marray)); 4664 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4665 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R)); 4666 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4667 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4668 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4669 PetscCall(VecResetArray(pcbddc->vec1_R)); 4670 PetscCall(VecResetArray(pcbddc->vec2_R)); 4671 } 4672 PetscCall(MatDenseRestoreArray(B_C,&marray)); 4673 } 4674 /* coarse basis functions */ 4675 for (i=0;i<pcbddc->local_primal_size;i++) { 4676 Vec v; 4677 4678 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*n_R)); 4679 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B,i,&v)); 4680 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4681 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4682 if (i<n_vertices) { 4683 PetscScalar one = 1.0; 4684 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4685 PetscCall(VecAssemblyBegin(v)); 4686 PetscCall(VecAssemblyEnd(v)); 4687 } 4688 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B,i,&v)); 4689 4690 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4691 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D,i,&v)); 4692 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4693 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4694 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D,i,&v)); 4695 } 4696 PetscCall(VecResetArray(pcbddc->vec1_R)); 4697 } 4698 PetscCall(MatDestroy(&B_V)); 4699 PetscCall(MatDestroy(&B_C)); 4700 } 4701 4702 /* free memory */ 4703 PetscCall(PetscFree(idx_V_B)); 4704 PetscCall(MatDestroy(&S_VV)); 4705 PetscCall(MatDestroy(&S_CV)); 4706 PetscCall(MatDestroy(&S_VC)); 4707 PetscCall(MatDestroy(&S_CC)); 4708 PetscCall(PetscFree(work)); 4709 if (n_vertices) { 4710 PetscCall(MatDestroy(&A_VR)); 4711 } 4712 if (n_constraints) { 4713 PetscCall(MatDestroy(&C_CR)); 4714 } 4715 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 4716 4717 /* Checking coarse_sub_mat and coarse basis functios */ 4718 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4719 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4720 if (pcbddc->dbg_flag) { 4721 Mat coarse_sub_mat; 4722 Mat AUXMAT,TM1,TM2,TM3,TM4; 4723 Mat coarse_phi_D,coarse_phi_B; 4724 Mat coarse_psi_D,coarse_psi_B; 4725 Mat A_II,A_BB,A_IB,A_BI; 4726 Mat C_B,CPHI; 4727 IS is_dummy; 4728 Vec mones; 4729 MatType checkmattype=MATSEQAIJ; 4730 PetscReal real_value; 4731 4732 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4733 Mat A; 4734 PetscCall(PCBDDCBenignProject(pc,NULL,NULL,&A)); 4735 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II)); 4736 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB)); 4737 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI)); 4738 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB)); 4739 PetscCall(MatDestroy(&A)); 4740 } else { 4741 PetscCall(MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II)); 4742 PetscCall(MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB)); 4743 PetscCall(MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI)); 4744 PetscCall(MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB)); 4745 } 4746 PetscCall(MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D)); 4747 PetscCall(MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B)); 4748 if (!pcbddc->symmetric_primal) { 4749 PetscCall(MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D)); 4750 PetscCall(MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B)); 4751 } 4752 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat)); 4753 4754 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 4755 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal)); 4756 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4757 if (!pcbddc->symmetric_primal) { 4758 PetscCall(MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4759 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1)); 4760 PetscCall(MatDestroy(&AUXMAT)); 4761 PetscCall(MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4762 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2)); 4763 PetscCall(MatDestroy(&AUXMAT)); 4764 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4765 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4766 PetscCall(MatDestroy(&AUXMAT)); 4767 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4768 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4769 PetscCall(MatDestroy(&AUXMAT)); 4770 } else { 4771 PetscCall(MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1)); 4772 PetscCall(MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2)); 4773 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4774 PetscCall(MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4775 PetscCall(MatDestroy(&AUXMAT)); 4776 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4777 PetscCall(MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4778 PetscCall(MatDestroy(&AUXMAT)); 4779 } 4780 PetscCall(MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN)); 4781 PetscCall(MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN)); 4782 PetscCall(MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN)); 4783 PetscCall(MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1)); 4784 if (pcbddc->benign_n) { 4785 Mat B0_B,B0_BPHI; 4786 const PetscScalar *data2; 4787 PetscScalar *data; 4788 PetscInt j; 4789 4790 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4791 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4792 PetscCall(MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4793 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4794 PetscCall(MatDenseGetArray(TM1,&data)); 4795 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data2)); 4796 for (j=0;j<pcbddc->benign_n;j++) { 4797 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4798 for (i=0;i<pcbddc->local_primal_size;i++) { 4799 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4800 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4801 } 4802 } 4803 PetscCall(MatDenseRestoreArray(TM1,&data)); 4804 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data2)); 4805 PetscCall(MatDestroy(&B0_B)); 4806 PetscCall(ISDestroy(&is_dummy)); 4807 PetscCall(MatDestroy(&B0_BPHI)); 4808 } 4809 #if 0 4810 { 4811 PetscViewer viewer; 4812 char filename[256]; 4813 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4814 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4815 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4816 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4817 PetscCall(MatView(coarse_sub_mat,viewer)); 4818 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4819 PetscCall(MatView(TM1,viewer)); 4820 if (pcbddc->coarse_phi_B) { 4821 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4822 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4823 } 4824 if (pcbddc->coarse_phi_D) { 4825 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4826 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4827 } 4828 if (pcbddc->coarse_psi_B) { 4829 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4830 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4831 } 4832 if (pcbddc->coarse_psi_D) { 4833 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4834 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4835 } 4836 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4837 PetscCall(MatView(pcbddc->local_mat,viewer)); 4838 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4839 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4840 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4841 PetscCall(ISView(pcis->is_I_local,viewer)); 4842 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4843 PetscCall(ISView(pcis->is_B_local,viewer)); 4844 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4845 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4846 PetscCall(PetscViewerDestroy(&viewer)); 4847 } 4848 #endif 4849 PetscCall(MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN)); 4850 PetscCall(MatNorm(TM1,NORM_FROBENIUS,&real_value)); 4851 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4852 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4853 4854 /* check constraints */ 4855 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy)); 4856 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 4857 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4858 PetscCall(MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4859 } else { 4860 PetscScalar *data; 4861 Mat tmat; 4862 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&data)); 4863 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat)); 4864 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B,&data)); 4865 PetscCall(MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4866 PetscCall(MatDestroy(&tmat)); 4867 } 4868 PetscCall(MatCreateVecs(CPHI,&mones,NULL)); 4869 PetscCall(VecSet(mones,-1.0)); 4870 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4871 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4872 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4873 if (!pcbddc->symmetric_primal) { 4874 PetscCall(MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI)); 4875 PetscCall(VecSet(mones,-1.0)); 4876 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4877 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4878 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4879 } 4880 PetscCall(MatDestroy(&C_B)); 4881 PetscCall(MatDestroy(&CPHI)); 4882 PetscCall(ISDestroy(&is_dummy)); 4883 PetscCall(VecDestroy(&mones)); 4884 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4885 PetscCall(MatDestroy(&A_II)); 4886 PetscCall(MatDestroy(&A_BB)); 4887 PetscCall(MatDestroy(&A_IB)); 4888 PetscCall(MatDestroy(&A_BI)); 4889 PetscCall(MatDestroy(&TM1)); 4890 PetscCall(MatDestroy(&TM2)); 4891 PetscCall(MatDestroy(&TM3)); 4892 PetscCall(MatDestroy(&TM4)); 4893 PetscCall(MatDestroy(&coarse_phi_D)); 4894 PetscCall(MatDestroy(&coarse_phi_B)); 4895 if (!pcbddc->symmetric_primal) { 4896 PetscCall(MatDestroy(&coarse_psi_D)); 4897 PetscCall(MatDestroy(&coarse_psi_B)); 4898 } 4899 PetscCall(MatDestroy(&coarse_sub_mat)); 4900 } 4901 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4902 { 4903 PetscBool gpu; 4904 4905 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu)); 4906 if (gpu) { 4907 if (pcbddc->local_auxmat1) { 4908 PetscCall(MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1)); 4909 } 4910 if (pcbddc->local_auxmat2) { 4911 PetscCall(MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2)); 4912 } 4913 if (pcbddc->coarse_phi_B) { 4914 PetscCall(MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B)); 4915 } 4916 if (pcbddc->coarse_phi_D) { 4917 PetscCall(MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D)); 4918 } 4919 if (pcbddc->coarse_psi_B) { 4920 PetscCall(MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B)); 4921 } 4922 if (pcbddc->coarse_psi_D) { 4923 PetscCall(MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D)); 4924 } 4925 } 4926 } 4927 /* get back data */ 4928 *coarse_submat_vals_n = coarse_submat_vals; 4929 PetscFunctionReturn(0); 4930 } 4931 4932 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4933 { 4934 Mat *work_mat; 4935 IS isrow_s,iscol_s; 4936 PetscBool rsorted,csorted; 4937 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4938 4939 PetscFunctionBegin; 4940 PetscCall(ISSorted(isrow,&rsorted)); 4941 PetscCall(ISSorted(iscol,&csorted)); 4942 PetscCall(ISGetLocalSize(isrow,&rsize)); 4943 PetscCall(ISGetLocalSize(iscol,&csize)); 4944 4945 if (!rsorted) { 4946 const PetscInt *idxs; 4947 PetscInt *idxs_sorted,i; 4948 4949 PetscCall(PetscMalloc1(rsize,&idxs_perm_r)); 4950 PetscCall(PetscMalloc1(rsize,&idxs_sorted)); 4951 for (i=0;i<rsize;i++) { 4952 idxs_perm_r[i] = i; 4953 } 4954 PetscCall(ISGetIndices(isrow,&idxs)); 4955 PetscCall(PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r)); 4956 for (i=0;i<rsize;i++) { 4957 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4958 } 4959 PetscCall(ISRestoreIndices(isrow,&idxs)); 4960 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s)); 4961 } else { 4962 PetscCall(PetscObjectReference((PetscObject)isrow)); 4963 isrow_s = isrow; 4964 } 4965 4966 if (!csorted) { 4967 if (isrow == iscol) { 4968 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4969 iscol_s = isrow_s; 4970 } else { 4971 const PetscInt *idxs; 4972 PetscInt *idxs_sorted,i; 4973 4974 PetscCall(PetscMalloc1(csize,&idxs_perm_c)); 4975 PetscCall(PetscMalloc1(csize,&idxs_sorted)); 4976 for (i=0;i<csize;i++) { 4977 idxs_perm_c[i] = i; 4978 } 4979 PetscCall(ISGetIndices(iscol,&idxs)); 4980 PetscCall(PetscSortIntWithPermutation(csize,idxs,idxs_perm_c)); 4981 for (i=0;i<csize;i++) { 4982 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4983 } 4984 PetscCall(ISRestoreIndices(iscol,&idxs)); 4985 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s)); 4986 } 4987 } else { 4988 PetscCall(PetscObjectReference((PetscObject)iscol)); 4989 iscol_s = iscol; 4990 } 4991 4992 PetscCall(MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat)); 4993 4994 if (!rsorted || !csorted) { 4995 Mat new_mat; 4996 IS is_perm_r,is_perm_c; 4997 4998 if (!rsorted) { 4999 PetscInt *idxs_r,i; 5000 PetscCall(PetscMalloc1(rsize,&idxs_r)); 5001 for (i=0;i<rsize;i++) { 5002 idxs_r[idxs_perm_r[i]] = i; 5003 } 5004 PetscCall(PetscFree(idxs_perm_r)); 5005 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r)); 5006 } else { 5007 PetscCall(ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r)); 5008 } 5009 PetscCall(ISSetPermutation(is_perm_r)); 5010 5011 if (!csorted) { 5012 if (isrow_s == iscol_s) { 5013 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5014 is_perm_c = is_perm_r; 5015 } else { 5016 PetscInt *idxs_c,i; 5017 PetscCheck(idxs_perm_c,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5018 PetscCall(PetscMalloc1(csize,&idxs_c)); 5019 for (i=0;i<csize;i++) { 5020 idxs_c[idxs_perm_c[i]] = i; 5021 } 5022 PetscCall(PetscFree(idxs_perm_c)); 5023 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c)); 5024 } 5025 } else { 5026 PetscCall(ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c)); 5027 } 5028 PetscCall(ISSetPermutation(is_perm_c)); 5029 5030 PetscCall(MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat)); 5031 PetscCall(MatDestroy(&work_mat[0])); 5032 work_mat[0] = new_mat; 5033 PetscCall(ISDestroy(&is_perm_r)); 5034 PetscCall(ISDestroy(&is_perm_c)); 5035 } 5036 5037 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5038 *B = work_mat[0]; 5039 PetscCall(MatDestroyMatrices(1,&work_mat)); 5040 PetscCall(ISDestroy(&isrow_s)); 5041 PetscCall(ISDestroy(&iscol_s)); 5042 PetscFunctionReturn(0); 5043 } 5044 5045 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5046 { 5047 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5048 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5049 Mat new_mat,lA; 5050 IS is_local,is_global; 5051 PetscInt local_size; 5052 PetscBool isseqaij; 5053 5054 PetscFunctionBegin; 5055 PetscCall(MatDestroy(&pcbddc->local_mat)); 5056 PetscCall(MatGetSize(matis->A,&local_size,NULL)); 5057 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local)); 5058 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping,is_local,&is_global)); 5059 PetscCall(ISDestroy(&is_local)); 5060 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat)); 5061 PetscCall(ISDestroy(&is_global)); 5062 5063 if (pcbddc->dbg_flag) { 5064 Vec x,x_change; 5065 PetscReal error; 5066 5067 PetscCall(MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change)); 5068 PetscCall(VecSetRandom(x,NULL)); 5069 PetscCall(MatMult(ChangeOfBasisMatrix,x,x_change)); 5070 PetscCall(VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5071 PetscCall(VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5072 PetscCall(MatMult(new_mat,matis->x,matis->y)); 5073 if (!pcbddc->change_interior) { 5074 const PetscScalar *x,*y,*v; 5075 PetscReal lerror = 0.; 5076 PetscInt i; 5077 5078 PetscCall(VecGetArrayRead(matis->x,&x)); 5079 PetscCall(VecGetArrayRead(matis->y,&y)); 5080 PetscCall(VecGetArrayRead(matis->counter,&v)); 5081 for (i=0;i<local_size;i++) 5082 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5083 lerror = PetscAbsScalar(x[i]-y[i]); 5084 PetscCall(VecRestoreArrayRead(matis->x,&x)); 5085 PetscCall(VecRestoreArrayRead(matis->y,&y)); 5086 PetscCall(VecRestoreArrayRead(matis->counter,&v)); 5087 PetscCall(MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)pc))); 5088 if (error > PETSC_SMALL) { 5089 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5090 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",(double)error); 5091 } else { 5092 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",(double)error); 5093 } 5094 } 5095 } 5096 PetscCall(VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5097 PetscCall(VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5098 PetscCall(VecAXPY(x,-1.0,x_change)); 5099 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 5100 if (error > PETSC_SMALL) { 5101 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5102 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",(double)error); 5103 } else { 5104 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",(double)error); 5105 } 5106 } 5107 PetscCall(VecDestroy(&x)); 5108 PetscCall(VecDestroy(&x_change)); 5109 } 5110 5111 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5112 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA)); 5113 5114 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5115 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij)); 5116 if (isseqaij) { 5117 PetscCall(MatDestroy(&pcbddc->local_mat)); 5118 PetscCall(MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5119 if (lA) { 5120 Mat work; 5121 PetscCall(MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5122 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5123 PetscCall(MatDestroy(&work)); 5124 } 5125 } else { 5126 Mat work_mat; 5127 5128 PetscCall(MatDestroy(&pcbddc->local_mat)); 5129 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5130 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5131 PetscCall(MatDestroy(&work_mat)); 5132 if (lA) { 5133 Mat work; 5134 PetscCall(MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5135 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5136 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5137 PetscCall(MatDestroy(&work)); 5138 } 5139 } 5140 if (matis->A->symmetric_set) { 5141 PetscCall(MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric)); 5142 #if !defined(PETSC_USE_COMPLEX) 5143 PetscCall(MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric)); 5144 #endif 5145 } 5146 PetscCall(MatDestroy(&new_mat)); 5147 PetscFunctionReturn(0); 5148 } 5149 5150 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5151 { 5152 PC_IS* pcis = (PC_IS*)(pc->data); 5153 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5154 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5155 PetscInt *idx_R_local=NULL; 5156 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5157 PetscInt vbs,bs; 5158 PetscBT bitmask=NULL; 5159 5160 PetscFunctionBegin; 5161 /* 5162 No need to setup local scatters if 5163 - primal space is unchanged 5164 AND 5165 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5166 AND 5167 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5168 */ 5169 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5170 PetscFunctionReturn(0); 5171 } 5172 /* destroy old objects */ 5173 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5174 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5175 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5176 /* Set Non-overlapping dimensions */ 5177 n_B = pcis->n_B; 5178 n_D = pcis->n - n_B; 5179 n_vertices = pcbddc->n_vertices; 5180 5181 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5182 5183 /* create auxiliary bitmask and allocate workspace */ 5184 if (!sub_schurs || !sub_schurs->reuse_solver) { 5185 PetscCall(PetscMalloc1(pcis->n-n_vertices,&idx_R_local)); 5186 PetscCall(PetscBTCreate(pcis->n,&bitmask)); 5187 for (i=0;i<n_vertices;i++) { 5188 PetscCall(PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i])); 5189 } 5190 5191 for (i=0, n_R=0; i<pcis->n; i++) { 5192 if (!PetscBTLookup(bitmask,i)) { 5193 idx_R_local[n_R++] = i; 5194 } 5195 } 5196 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5197 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5198 5199 PetscCall(ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5200 PetscCall(ISGetLocalSize(reuse_solver->is_R,&n_R)); 5201 } 5202 5203 /* Block code */ 5204 vbs = 1; 5205 PetscCall(MatGetBlockSize(pcbddc->local_mat,&bs)); 5206 if (bs>1 && !(n_vertices%bs)) { 5207 PetscBool is_blocked = PETSC_TRUE; 5208 PetscInt *vary; 5209 if (!sub_schurs || !sub_schurs->reuse_solver) { 5210 PetscCall(PetscMalloc1(pcis->n/bs,&vary)); 5211 PetscCall(PetscArrayzero(vary,pcis->n/bs)); 5212 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5213 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5214 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5215 for (i=0; i<pcis->n/bs; i++) { 5216 if (vary[i]!=0 && vary[i]!=bs) { 5217 is_blocked = PETSC_FALSE; 5218 break; 5219 } 5220 } 5221 PetscCall(PetscFree(vary)); 5222 } else { 5223 /* Verify directly the R set */ 5224 for (i=0; i<n_R/bs; i++) { 5225 PetscInt j,node=idx_R_local[bs*i]; 5226 for (j=1; j<bs; j++) { 5227 if (node != idx_R_local[bs*i+j]-j) { 5228 is_blocked = PETSC_FALSE; 5229 break; 5230 } 5231 } 5232 } 5233 } 5234 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5235 vbs = bs; 5236 for (i=0;i<n_R/vbs;i++) { 5237 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5238 } 5239 } 5240 } 5241 PetscCall(ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local)); 5242 if (sub_schurs && sub_schurs->reuse_solver) { 5243 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5244 5245 PetscCall(ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5246 PetscCall(ISDestroy(&reuse_solver->is_R)); 5247 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5248 reuse_solver->is_R = pcbddc->is_R_local; 5249 } else { 5250 PetscCall(PetscFree(idx_R_local)); 5251 } 5252 5253 /* print some info if requested */ 5254 if (pcbddc->dbg_flag) { 5255 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5256 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5257 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5258 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank)); 5259 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n",pcis->n,n_D,n_B)); 5260 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size)); 5261 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5262 } 5263 5264 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5265 if (!sub_schurs || !sub_schurs->reuse_solver) { 5266 IS is_aux1,is_aux2; 5267 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5268 5269 PetscCall(ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5270 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array1)); 5271 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array2)); 5272 PetscCall(ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5273 for (i=0; i<n_D; i++) { 5274 PetscCall(PetscBTSet(bitmask,is_indices[i])); 5275 } 5276 PetscCall(ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5277 for (i=0, j=0; i<n_R; i++) { 5278 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5279 aux_array1[j++] = i; 5280 } 5281 } 5282 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5283 PetscCall(ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5284 for (i=0, j=0; i<n_B; i++) { 5285 if (!PetscBTLookup(bitmask,is_indices[i])) { 5286 aux_array2[j++] = i; 5287 } 5288 } 5289 PetscCall(ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5290 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2)); 5291 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B)); 5292 PetscCall(ISDestroy(&is_aux1)); 5293 PetscCall(ISDestroy(&is_aux2)); 5294 5295 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5296 PetscCall(PetscMalloc1(n_D,&aux_array1)); 5297 for (i=0, j=0; i<n_R; i++) { 5298 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5299 aux_array1[j++] = i; 5300 } 5301 } 5302 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5303 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5304 PetscCall(ISDestroy(&is_aux1)); 5305 } 5306 PetscCall(PetscBTDestroy(&bitmask)); 5307 PetscCall(ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5308 } else { 5309 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5310 IS tis; 5311 PetscInt schur_size; 5312 5313 PetscCall(ISGetLocalSize(reuse_solver->is_B,&schur_size)); 5314 PetscCall(ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis)); 5315 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B)); 5316 PetscCall(ISDestroy(&tis)); 5317 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5318 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis)); 5319 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5320 PetscCall(ISDestroy(&tis)); 5321 } 5322 } 5323 PetscFunctionReturn(0); 5324 } 5325 5326 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5327 { 5328 MatNullSpace NullSpace; 5329 Mat dmat; 5330 const Vec *nullvecs; 5331 Vec v,v2,*nullvecs2; 5332 VecScatter sct = NULL; 5333 PetscContainer c; 5334 PetscScalar *ddata; 5335 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5336 PetscBool nnsp_has_cnst; 5337 5338 PetscFunctionBegin; 5339 if (!is && !B) { /* MATIS */ 5340 Mat_IS* matis = (Mat_IS*)A->data; 5341 5342 if (!B) { 5343 PetscCall(MatISGetLocalMat(A,&B)); 5344 } 5345 sct = matis->cctx; 5346 PetscCall(PetscObjectReference((PetscObject)sct)); 5347 } else { 5348 PetscCall(MatGetNullSpace(B,&NullSpace)); 5349 if (!NullSpace) { 5350 PetscCall(MatGetNearNullSpace(B,&NullSpace)); 5351 } 5352 if (NullSpace) PetscFunctionReturn(0); 5353 } 5354 PetscCall(MatGetNullSpace(A,&NullSpace)); 5355 if (!NullSpace) { 5356 PetscCall(MatGetNearNullSpace(A,&NullSpace)); 5357 } 5358 if (!NullSpace) PetscFunctionReturn(0); 5359 5360 PetscCall(MatCreateVecs(A,&v,NULL)); 5361 PetscCall(MatCreateVecs(B,&v2,NULL)); 5362 if (!sct) { 5363 PetscCall(VecScatterCreate(v,is,v2,NULL,&sct)); 5364 } 5365 PetscCall(MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs)); 5366 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5367 PetscCall(PetscMalloc1(bsiz,&nullvecs2)); 5368 PetscCall(VecGetBlockSize(v2,&bs)); 5369 PetscCall(VecGetSize(v2,&N)); 5370 PetscCall(VecGetLocalSize(v2,&n)); 5371 PetscCall(PetscMalloc1(n*bsiz,&ddata)); 5372 for (k=0;k<nnsp_size;k++) { 5373 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k])); 5374 PetscCall(VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5375 PetscCall(VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5376 } 5377 if (nnsp_has_cnst) { 5378 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size])); 5379 PetscCall(VecSet(nullvecs2[nnsp_size],1.0)); 5380 } 5381 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2)); 5382 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace)); 5383 5384 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat)); 5385 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B),&c)); 5386 PetscCall(PetscContainerSetPointer(c,ddata)); 5387 PetscCall(PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault)); 5388 PetscCall(PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c)); 5389 PetscCall(PetscContainerDestroy(&c)); 5390 PetscCall(PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat)); 5391 PetscCall(MatDestroy(&dmat)); 5392 5393 for (k=0;k<bsiz;k++) { 5394 PetscCall(VecDestroy(&nullvecs2[k])); 5395 } 5396 PetscCall(PetscFree(nullvecs2)); 5397 PetscCall(MatSetNearNullSpace(B,NullSpace)); 5398 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5399 PetscCall(VecDestroy(&v)); 5400 PetscCall(VecDestroy(&v2)); 5401 PetscCall(VecScatterDestroy(&sct)); 5402 PetscFunctionReturn(0); 5403 } 5404 5405 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5406 { 5407 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5408 PC_IS *pcis = (PC_IS*)pc->data; 5409 PC pc_temp; 5410 Mat A_RR; 5411 MatNullSpace nnsp; 5412 MatReuse reuse; 5413 PetscScalar m_one = -1.0; 5414 PetscReal value; 5415 PetscInt n_D,n_R; 5416 PetscBool issbaij,opts; 5417 void (*f)(void) = NULL; 5418 char dir_prefix[256],neu_prefix[256],str_level[16]; 5419 size_t len; 5420 5421 PetscFunctionBegin; 5422 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5423 /* approximate solver, propagate NearNullSpace if needed */ 5424 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5425 MatNullSpace gnnsp1,gnnsp2; 5426 PetscBool lhas,ghas; 5427 5428 PetscCall(MatGetNearNullSpace(pcbddc->local_mat,&nnsp)); 5429 PetscCall(MatGetNearNullSpace(pc->pmat,&gnnsp1)); 5430 PetscCall(MatGetNullSpace(pc->pmat,&gnnsp2)); 5431 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5432 PetscCall(MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 5433 if (!ghas && (gnnsp1 || gnnsp2)) { 5434 PetscCall(MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL)); 5435 } 5436 } 5437 5438 /* compute prefixes */ 5439 PetscCall(PetscStrcpy(dir_prefix,"")); 5440 PetscCall(PetscStrcpy(neu_prefix,"")); 5441 if (!pcbddc->current_level) { 5442 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix))); 5443 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix))); 5444 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5445 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5446 } else { 5447 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 5448 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 5449 len -= 15; /* remove "pc_bddc_coarse_" */ 5450 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5451 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5452 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5453 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1)); 5454 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1)); 5455 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5456 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5457 PetscCall(PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix))); 5458 PetscCall(PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix))); 5459 } 5460 5461 /* DIRICHLET PROBLEM */ 5462 if (dirichlet) { 5463 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5464 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5465 PetscCheck(sub_schurs && sub_schurs->reuse_solver,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5466 if (pcbddc->dbg_flag) { 5467 Mat A_IIn; 5468 5469 PetscCall(PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn)); 5470 PetscCall(MatDestroy(&pcis->A_II)); 5471 pcis->A_II = A_IIn; 5472 } 5473 } 5474 if (pcbddc->local_mat->symmetric_set) { 5475 PetscCall(MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric)); 5476 } 5477 /* Matrix for Dirichlet problem is pcis->A_II */ 5478 n_D = pcis->n - pcis->n_B; 5479 opts = PETSC_FALSE; 5480 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5481 opts = PETSC_TRUE; 5482 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D)); 5483 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1)); 5484 /* default */ 5485 PetscCall(KSPSetType(pcbddc->ksp_D,KSPPREONLY)); 5486 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix)); 5487 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij)); 5488 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5489 if (issbaij) { 5490 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5491 } else { 5492 PetscCall(PCSetType(pc_temp,PCLU)); 5493 } 5494 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure)); 5495 } 5496 PetscCall(MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix)); 5497 PetscCall(KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II)); 5498 /* Allow user's customization */ 5499 if (opts) { 5500 PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5501 } 5502 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5503 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5504 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II)); 5505 } 5506 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5507 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5508 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5509 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5510 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5511 const PetscInt *idxs; 5512 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5513 5514 PetscCall(ISGetLocalSize(pcis->is_I_local,&nl)); 5515 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 5516 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5517 for (i=0;i<nl;i++) { 5518 for (d=0;d<cdim;d++) { 5519 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5520 } 5521 } 5522 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 5523 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5524 PetscCall(PetscFree(scoords)); 5525 } 5526 if (sub_schurs && sub_schurs->reuse_solver) { 5527 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5528 5529 PetscCall(KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver)); 5530 } 5531 5532 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5533 if (!n_D) { 5534 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5535 PetscCall(PCSetType(pc_temp,PCNONE)); 5536 } 5537 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5538 /* set ksp_D into pcis data */ 5539 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5540 PetscCall(KSPDestroy(&pcis->ksp_D)); 5541 pcis->ksp_D = pcbddc->ksp_D; 5542 } 5543 5544 /* NEUMANN PROBLEM */ 5545 A_RR = NULL; 5546 if (neumann) { 5547 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5548 PetscInt ibs,mbs; 5549 PetscBool issbaij, reuse_neumann_solver; 5550 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5551 5552 reuse_neumann_solver = PETSC_FALSE; 5553 if (sub_schurs && sub_schurs->reuse_solver) { 5554 IS iP; 5555 5556 reuse_neumann_solver = PETSC_TRUE; 5557 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP)); 5558 if (iP) reuse_neumann_solver = PETSC_FALSE; 5559 } 5560 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5561 PetscCall(ISGetSize(pcbddc->is_R_local,&n_R)); 5562 if (pcbddc->ksp_R) { /* already created ksp */ 5563 PetscInt nn_R; 5564 PetscCall(KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR)); 5565 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5566 PetscCall(MatGetSize(A_RR,&nn_R,NULL)); 5567 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5568 PetscCall(KSPReset(pcbddc->ksp_R)); 5569 PetscCall(MatDestroy(&A_RR)); 5570 reuse = MAT_INITIAL_MATRIX; 5571 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5572 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5573 PetscCall(MatDestroy(&A_RR)); 5574 reuse = MAT_INITIAL_MATRIX; 5575 } else { /* safe to reuse the matrix */ 5576 reuse = MAT_REUSE_MATRIX; 5577 } 5578 } 5579 /* last check */ 5580 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5581 PetscCall(MatDestroy(&A_RR)); 5582 reuse = MAT_INITIAL_MATRIX; 5583 } 5584 } else { /* first time, so we need to create the matrix */ 5585 reuse = MAT_INITIAL_MATRIX; 5586 } 5587 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5588 TODO: Get Rid of these conversions */ 5589 PetscCall(MatGetBlockSize(pcbddc->local_mat,&mbs)); 5590 PetscCall(ISGetBlockSize(pcbddc->is_R_local,&ibs)); 5591 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij)); 5592 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5593 if (matis->A == pcbddc->local_mat) { 5594 PetscCall(MatDestroy(&pcbddc->local_mat)); 5595 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5596 } else { 5597 PetscCall(MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5598 } 5599 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5600 if (matis->A == pcbddc->local_mat) { 5601 PetscCall(MatDestroy(&pcbddc->local_mat)); 5602 PetscCall(MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5603 } else { 5604 PetscCall(MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5605 } 5606 } 5607 /* extract A_RR */ 5608 if (reuse_neumann_solver) { 5609 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5610 5611 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5612 PetscCall(MatDestroy(&A_RR)); 5613 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5614 PetscCall(PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR)); 5615 } else { 5616 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR)); 5617 } 5618 } else { 5619 PetscCall(MatDestroy(&A_RR)); 5620 PetscCall(PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL)); 5621 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5622 } 5623 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5624 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR)); 5625 } 5626 if (pcbddc->local_mat->symmetric_set) { 5627 PetscCall(MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric)); 5628 } 5629 opts = PETSC_FALSE; 5630 if (!pcbddc->ksp_R) { /* create object if not present */ 5631 opts = PETSC_TRUE; 5632 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R)); 5633 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1)); 5634 /* default */ 5635 PetscCall(KSPSetType(pcbddc->ksp_R,KSPPREONLY)); 5636 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix)); 5637 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5638 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij)); 5639 if (issbaij) { 5640 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5641 } else { 5642 PetscCall(PCSetType(pc_temp,PCLU)); 5643 } 5644 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure)); 5645 } 5646 PetscCall(KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR)); 5647 PetscCall(MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix)); 5648 if (opts) { /* Allow user's customization once */ 5649 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5650 } 5651 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5652 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5653 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR)); 5654 } 5655 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5656 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5657 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5658 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5659 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5660 const PetscInt *idxs; 5661 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5662 5663 PetscCall(ISGetLocalSize(pcbddc->is_R_local,&nl)); 5664 PetscCall(ISGetIndices(pcbddc->is_R_local,&idxs)); 5665 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5666 for (i=0;i<nl;i++) { 5667 for (d=0;d<cdim;d++) { 5668 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5669 } 5670 } 5671 PetscCall(ISRestoreIndices(pcbddc->is_R_local,&idxs)); 5672 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5673 PetscCall(PetscFree(scoords)); 5674 } 5675 5676 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5677 if (!n_R) { 5678 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5679 PetscCall(PCSetType(pc_temp,PCNONE)); 5680 } 5681 /* Reuse solver if it is present */ 5682 if (reuse_neumann_solver) { 5683 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5684 5685 PetscCall(KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver)); 5686 } 5687 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5688 } 5689 5690 if (pcbddc->dbg_flag) { 5691 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5692 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5693 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5694 } 5695 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5696 5697 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5698 if (pcbddc->NullSpace_corr[0]) { 5699 PetscCall(PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE)); 5700 } 5701 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5702 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1])); 5703 } 5704 if (neumann && pcbddc->NullSpace_corr[2]) { 5705 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3])); 5706 } 5707 /* check Dirichlet and Neumann solvers */ 5708 if (pcbddc->dbg_flag) { 5709 if (dirichlet) { /* Dirichlet */ 5710 PetscCall(VecSetRandom(pcis->vec1_D,NULL)); 5711 PetscCall(MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D)); 5712 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D)); 5713 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 5714 PetscCall(VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D)); 5715 PetscCall(VecNorm(pcis->vec1_D,NORM_INFINITY,&value)); 5716 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,(double)value)); 5717 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5718 } 5719 if (neumann) { /* Neumann */ 5720 PetscCall(VecSetRandom(pcbddc->vec1_R,NULL)); 5721 PetscCall(MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R)); 5722 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R)); 5723 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 5724 PetscCall(VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R)); 5725 PetscCall(VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value)); 5726 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,(double)value)); 5727 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5728 } 5729 } 5730 /* free Neumann problem's matrix */ 5731 PetscCall(MatDestroy(&A_RR)); 5732 PetscFunctionReturn(0); 5733 } 5734 5735 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5736 { 5737 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5738 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5739 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5740 5741 PetscFunctionBegin; 5742 if (!reuse_solver) { 5743 PetscCall(VecSet(pcbddc->vec1_R,0.)); 5744 } 5745 if (!pcbddc->switch_static) { 5746 if (applytranspose && pcbddc->local_auxmat1) { 5747 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C)); 5748 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5749 } 5750 if (!reuse_solver) { 5751 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5752 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5753 } else { 5754 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5755 5756 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5757 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5758 } 5759 } else { 5760 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5761 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5762 PetscCall(VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5763 PetscCall(VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5764 if (applytranspose && pcbddc->local_auxmat1) { 5765 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C)); 5766 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5767 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5768 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5769 } 5770 } 5771 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5772 if (!reuse_solver || pcbddc->switch_static) { 5773 if (applytranspose) { 5774 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5775 } else { 5776 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5777 } 5778 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R)); 5779 } else { 5780 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5781 5782 if (applytranspose) { 5783 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5784 } else { 5785 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5786 } 5787 } 5788 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5789 PetscCall(VecSet(inout_B,0.)); 5790 if (!pcbddc->switch_static) { 5791 if (!reuse_solver) { 5792 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5793 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5794 } else { 5795 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5796 5797 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5798 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5799 } 5800 if (!applytranspose && pcbddc->local_auxmat1) { 5801 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5802 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B)); 5803 } 5804 } else { 5805 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5806 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5807 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5808 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5809 if (!applytranspose && pcbddc->local_auxmat1) { 5810 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5811 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R)); 5812 } 5813 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5814 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5815 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5816 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5817 } 5818 PetscFunctionReturn(0); 5819 } 5820 5821 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5822 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5823 { 5824 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5825 PC_IS* pcis = (PC_IS*) (pc->data); 5826 const PetscScalar zero = 0.0; 5827 5828 PetscFunctionBegin; 5829 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5830 if (!pcbddc->benign_apply_coarse_only) { 5831 if (applytranspose) { 5832 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P)); 5833 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5834 } else { 5835 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P)); 5836 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5837 } 5838 } else { 5839 PetscCall(VecSet(pcbddc->vec1_P,zero)); 5840 } 5841 5842 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5843 if (pcbddc->benign_n) { 5844 PetscScalar *array; 5845 PetscInt j; 5846 5847 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5848 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5849 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5850 } 5851 5852 /* start communications from local primal nodes to rhs of coarse solver */ 5853 PetscCall(VecSet(pcbddc->coarse_vec,zero)); 5854 PetscCall(PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD)); 5855 PetscCall(PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD)); 5856 5857 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5858 if (pcbddc->coarse_ksp) { 5859 Mat coarse_mat; 5860 Vec rhs,sol; 5861 MatNullSpace nullsp; 5862 PetscBool isbddc = PETSC_FALSE; 5863 5864 if (pcbddc->benign_have_null) { 5865 PC coarse_pc; 5866 5867 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5868 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 5869 /* we need to propagate to coarser levels the need for a possible benign correction */ 5870 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5871 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5872 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5873 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5874 } 5875 } 5876 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&rhs)); 5877 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&sol)); 5878 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 5879 if (applytranspose) { 5880 PetscCheck(!pcbddc->benign_apply_coarse_only,PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5881 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5882 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol)); 5883 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5884 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5885 PetscCall(MatGetTransposeNullSpace(coarse_mat,&nullsp)); 5886 if (nullsp) { 5887 PetscCall(MatNullSpaceRemove(nullsp,sol)); 5888 } 5889 } else { 5890 PetscCall(MatGetNullSpace(coarse_mat,&nullsp)); 5891 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5892 PC coarse_pc; 5893 5894 if (nullsp) { 5895 PetscCall(MatNullSpaceRemove(nullsp,rhs)); 5896 } 5897 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5898 PetscCall(PCPreSolve(coarse_pc,pcbddc->coarse_ksp)); 5899 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol)); 5900 PetscCall(PCPostSolve(coarse_pc,pcbddc->coarse_ksp)); 5901 } else { 5902 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5903 PetscCall(KSPSolve(pcbddc->coarse_ksp,rhs,sol)); 5904 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5905 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5906 if (nullsp) { 5907 PetscCall(MatNullSpaceRemove(nullsp,sol)); 5908 } 5909 } 5910 } 5911 /* we don't need the benign correction at coarser levels anymore */ 5912 if (pcbddc->benign_have_null && isbddc) { 5913 PC coarse_pc; 5914 PC_BDDC* coarsepcbddc; 5915 5916 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5917 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5918 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5919 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5920 } 5921 } 5922 5923 /* Local solution on R nodes */ 5924 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5925 PetscCall(PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose)); 5926 } 5927 /* communications from coarse sol to local primal nodes */ 5928 PetscCall(PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE)); 5929 PetscCall(PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE)); 5930 5931 /* Sum contributions from the two levels */ 5932 if (!pcbddc->benign_apply_coarse_only) { 5933 if (applytranspose) { 5934 PetscCall(MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5935 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5936 } else { 5937 PetscCall(MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5938 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5939 } 5940 /* store p0 */ 5941 if (pcbddc->benign_n) { 5942 PetscScalar *array; 5943 PetscInt j; 5944 5945 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5946 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5947 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5948 } 5949 } else { /* expand the coarse solution */ 5950 if (applytranspose) { 5951 PetscCall(MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B)); 5952 } else { 5953 PetscCall(MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B)); 5954 } 5955 } 5956 PetscFunctionReturn(0); 5957 } 5958 5959 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5960 { 5961 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5962 Vec from,to; 5963 const PetscScalar *array; 5964 5965 PetscFunctionBegin; 5966 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5967 from = pcbddc->coarse_vec; 5968 to = pcbddc->vec1_P; 5969 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5970 Vec tvec; 5971 5972 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5973 PetscCall(VecResetArray(tvec)); 5974 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&tvec)); 5975 PetscCall(VecGetArrayRead(tvec,&array)); 5976 PetscCall(VecPlaceArray(from,array)); 5977 PetscCall(VecRestoreArrayRead(tvec,&array)); 5978 } 5979 } else { /* from local to global -> put data in coarse right hand side */ 5980 from = pcbddc->vec1_P; 5981 to = pcbddc->coarse_vec; 5982 } 5983 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 5984 PetscFunctionReturn(0); 5985 } 5986 5987 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5988 { 5989 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5990 Vec from,to; 5991 const PetscScalar *array; 5992 5993 PetscFunctionBegin; 5994 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5995 from = pcbddc->coarse_vec; 5996 to = pcbddc->vec1_P; 5997 } else { /* from local to global -> put data in coarse right hand side */ 5998 from = pcbddc->vec1_P; 5999 to = pcbddc->coarse_vec; 6000 } 6001 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 6002 if (smode == SCATTER_FORWARD) { 6003 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6004 Vec tvec; 6005 6006 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 6007 PetscCall(VecGetArrayRead(to,&array)); 6008 PetscCall(VecPlaceArray(tvec,array)); 6009 PetscCall(VecRestoreArrayRead(to,&array)); 6010 } 6011 } else { 6012 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6013 PetscCall(VecResetArray(from)); 6014 } 6015 } 6016 PetscFunctionReturn(0); 6017 } 6018 6019 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6020 { 6021 PC_IS* pcis = (PC_IS*)(pc->data); 6022 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6023 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6024 /* one and zero */ 6025 PetscScalar one=1.0,zero=0.0; 6026 /* space to store constraints and their local indices */ 6027 PetscScalar *constraints_data; 6028 PetscInt *constraints_idxs,*constraints_idxs_B; 6029 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6030 PetscInt *constraints_n; 6031 /* iterators */ 6032 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6033 /* BLAS integers */ 6034 PetscBLASInt lwork,lierr; 6035 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6036 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6037 /* reuse */ 6038 PetscInt olocal_primal_size,olocal_primal_size_cc; 6039 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6040 /* change of basis */ 6041 PetscBool qr_needed; 6042 PetscBT change_basis,qr_needed_idx; 6043 /* auxiliary stuff */ 6044 PetscInt *nnz,*is_indices; 6045 PetscInt ncc; 6046 /* some quantities */ 6047 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6048 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6049 PetscReal tol; /* tolerance for retaining eigenmodes */ 6050 6051 PetscFunctionBegin; 6052 tol = PetscSqrtReal(PETSC_SMALL); 6053 /* Destroy Mat objects computed previously */ 6054 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6055 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6056 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6057 /* save info on constraints from previous setup (if any) */ 6058 olocal_primal_size = pcbddc->local_primal_size; 6059 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6060 PetscCall(PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult)); 6061 PetscCall(PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc)); 6062 PetscCall(PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc)); 6063 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 6064 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6065 6066 if (!pcbddc->adaptive_selection) { 6067 IS ISForVertices,*ISForFaces,*ISForEdges; 6068 MatNullSpace nearnullsp; 6069 const Vec *nearnullvecs; 6070 Vec *localnearnullsp; 6071 PetscScalar *array; 6072 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 6073 PetscBool nnsp_has_cnst; 6074 /* LAPACK working arrays for SVD or POD */ 6075 PetscBool skip_lapack,boolforchange; 6076 PetscScalar *work; 6077 PetscReal *singular_vals; 6078 #if defined(PETSC_USE_COMPLEX) 6079 PetscReal *rwork; 6080 #endif 6081 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6082 PetscBLASInt dummy_int=1; 6083 PetscScalar dummy_scalar=1.; 6084 PetscBool use_pod = PETSC_FALSE; 6085 6086 /* MKL SVD with same input gives different results on different processes! */ 6087 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6088 use_pod = PETSC_TRUE; 6089 #endif 6090 /* Get index sets for faces, edges and vertices from graph */ 6091 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices)); 6092 /* print some info */ 6093 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6094 PetscInt nv; 6095 6096 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 6097 PetscCall(ISGetSize(ISForVertices,&nv)); 6098 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6099 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6100 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices)); 6101 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges)); 6102 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces)); 6103 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6104 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6105 } 6106 6107 /* free unneeded index sets */ 6108 if (!pcbddc->use_vertices) { 6109 PetscCall(ISDestroy(&ISForVertices)); 6110 } 6111 if (!pcbddc->use_edges) { 6112 for (i=0;i<n_ISForEdges;i++) { 6113 PetscCall(ISDestroy(&ISForEdges[i])); 6114 } 6115 PetscCall(PetscFree(ISForEdges)); 6116 n_ISForEdges = 0; 6117 } 6118 if (!pcbddc->use_faces) { 6119 for (i=0;i<n_ISForFaces;i++) { 6120 PetscCall(ISDestroy(&ISForFaces[i])); 6121 } 6122 PetscCall(PetscFree(ISForFaces)); 6123 n_ISForFaces = 0; 6124 } 6125 6126 /* check if near null space is attached to global mat */ 6127 if (pcbddc->use_nnsp) { 6128 PetscCall(MatGetNearNullSpace(pc->pmat,&nearnullsp)); 6129 } else nearnullsp = NULL; 6130 6131 if (nearnullsp) { 6132 PetscCall(MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs)); 6133 /* remove any stored info */ 6134 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6135 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6136 /* store information for BDDC solver reuse */ 6137 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6138 pcbddc->onearnullspace = nearnullsp; 6139 PetscCall(PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state)); 6140 for (i=0;i<nnsp_size;i++) { 6141 PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i])); 6142 } 6143 } else { /* if near null space is not provided BDDC uses constants by default */ 6144 nnsp_size = 0; 6145 nnsp_has_cnst = PETSC_TRUE; 6146 } 6147 /* get max number of constraints on a single cc */ 6148 max_constraints = nnsp_size; 6149 if (nnsp_has_cnst) max_constraints++; 6150 6151 /* 6152 Evaluate maximum storage size needed by the procedure 6153 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6154 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6155 There can be multiple constraints per connected component 6156 */ 6157 n_vertices = 0; 6158 if (ISForVertices) { 6159 PetscCall(ISGetSize(ISForVertices,&n_vertices)); 6160 } 6161 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6162 PetscCall(PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n)); 6163 6164 total_counts = n_ISForFaces+n_ISForEdges; 6165 total_counts *= max_constraints; 6166 total_counts += n_vertices; 6167 PetscCall(PetscBTCreate(total_counts,&change_basis)); 6168 6169 total_counts = 0; 6170 max_size_of_constraint = 0; 6171 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6172 IS used_is; 6173 if (i<n_ISForEdges) { 6174 used_is = ISForEdges[i]; 6175 } else { 6176 used_is = ISForFaces[i-n_ISForEdges]; 6177 } 6178 PetscCall(ISGetSize(used_is,&j)); 6179 total_counts += j; 6180 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6181 } 6182 PetscCall(PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B)); 6183 6184 /* get local part of global near null space vectors */ 6185 PetscCall(PetscMalloc1(nnsp_size,&localnearnullsp)); 6186 for (k=0;k<nnsp_size;k++) { 6187 PetscCall(VecDuplicate(pcis->vec1_N,&localnearnullsp[k])); 6188 PetscCall(VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6189 PetscCall(VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6190 } 6191 6192 /* whether or not to skip lapack calls */ 6193 skip_lapack = PETSC_TRUE; 6194 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6195 6196 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6197 if (!skip_lapack) { 6198 PetscScalar temp_work; 6199 6200 if (use_pod) { 6201 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6202 PetscCall(PetscMalloc1(max_constraints*max_constraints,&correlation_mat)); 6203 PetscCall(PetscMalloc1(max_constraints,&singular_vals)); 6204 PetscCall(PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis)); 6205 #if defined(PETSC_USE_COMPLEX) 6206 PetscCall(PetscMalloc1(3*max_constraints,&rwork)); 6207 #endif 6208 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6209 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6210 PetscCall(PetscBLASIntCast(max_constraints,&Blas_LDA)); 6211 lwork = -1; 6212 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6213 #if !defined(PETSC_USE_COMPLEX) 6214 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6215 #else 6216 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6217 #endif 6218 PetscCall(PetscFPTrapPop()); 6219 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6220 } else { 6221 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6222 /* SVD */ 6223 PetscInt max_n,min_n; 6224 max_n = max_size_of_constraint; 6225 min_n = max_constraints; 6226 if (max_size_of_constraint < max_constraints) { 6227 min_n = max_size_of_constraint; 6228 max_n = max_constraints; 6229 } 6230 PetscCall(PetscMalloc1(min_n,&singular_vals)); 6231 #if defined(PETSC_USE_COMPLEX) 6232 PetscCall(PetscMalloc1(5*min_n,&rwork)); 6233 #endif 6234 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6235 lwork = -1; 6236 PetscCall(PetscBLASIntCast(max_n,&Blas_M)); 6237 PetscCall(PetscBLASIntCast(min_n,&Blas_N)); 6238 PetscCall(PetscBLASIntCast(max_n,&Blas_LDA)); 6239 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6240 #if !defined(PETSC_USE_COMPLEX) 6241 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6242 #else 6243 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6244 #endif 6245 PetscCall(PetscFPTrapPop()); 6246 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6247 #else 6248 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6249 #endif /* on missing GESVD */ 6250 } 6251 /* Allocate optimal workspace */ 6252 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork)); 6253 PetscCall(PetscMalloc1(lwork,&work)); 6254 } 6255 /* Now we can loop on constraining sets */ 6256 total_counts = 0; 6257 constraints_idxs_ptr[0] = 0; 6258 constraints_data_ptr[0] = 0; 6259 /* vertices */ 6260 if (n_vertices) { 6261 PetscCall(ISGetIndices(ISForVertices,(const PetscInt**)&is_indices)); 6262 PetscCall(PetscArraycpy(constraints_idxs,is_indices,n_vertices)); 6263 for (i=0;i<n_vertices;i++) { 6264 constraints_n[total_counts] = 1; 6265 constraints_data[total_counts] = 1.0; 6266 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6267 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6268 total_counts++; 6269 } 6270 PetscCall(ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices)); 6271 n_vertices = total_counts; 6272 } 6273 6274 /* edges and faces */ 6275 total_counts_cc = total_counts; 6276 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6277 IS used_is; 6278 PetscBool idxs_copied = PETSC_FALSE; 6279 6280 if (ncc<n_ISForEdges) { 6281 used_is = ISForEdges[ncc]; 6282 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6283 } else { 6284 used_is = ISForFaces[ncc-n_ISForEdges]; 6285 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6286 } 6287 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6288 6289 PetscCall(ISGetSize(used_is,&size_of_constraint)); 6290 PetscCall(ISGetIndices(used_is,(const PetscInt**)&is_indices)); 6291 /* change of basis should not be performed on local periodic nodes */ 6292 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6293 if (nnsp_has_cnst) { 6294 PetscScalar quad_value; 6295 6296 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6297 idxs_copied = PETSC_TRUE; 6298 6299 if (!pcbddc->use_nnsp_true) { 6300 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6301 } else { 6302 quad_value = 1.0; 6303 } 6304 for (j=0;j<size_of_constraint;j++) { 6305 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6306 } 6307 temp_constraints++; 6308 total_counts++; 6309 } 6310 for (k=0;k<nnsp_size;k++) { 6311 PetscReal real_value; 6312 PetscScalar *ptr_to_data; 6313 6314 PetscCall(VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6315 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6316 for (j=0;j<size_of_constraint;j++) { 6317 ptr_to_data[j] = array[is_indices[j]]; 6318 } 6319 PetscCall(VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6320 /* check if array is null on the connected component */ 6321 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6322 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6323 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6324 temp_constraints++; 6325 total_counts++; 6326 if (!idxs_copied) { 6327 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6328 idxs_copied = PETSC_TRUE; 6329 } 6330 } 6331 } 6332 PetscCall(ISRestoreIndices(used_is,(const PetscInt**)&is_indices)); 6333 valid_constraints = temp_constraints; 6334 if (!pcbddc->use_nnsp_true && temp_constraints) { 6335 if (temp_constraints == 1) { /* just normalize the constraint */ 6336 PetscScalar norm,*ptr_to_data; 6337 6338 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6339 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6340 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6341 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6342 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6343 } else { /* perform SVD */ 6344 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6345 6346 if (use_pod) { 6347 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6348 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6349 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6350 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6351 from that computed using LAPACKgesvd 6352 -> This is due to a different computation of eigenvectors in LAPACKheev 6353 -> The quality of the POD-computed basis will be the same */ 6354 PetscCall(PetscArrayzero(correlation_mat,temp_constraints*temp_constraints)); 6355 /* Store upper triangular part of correlation matrix */ 6356 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6357 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6358 for (j=0;j<temp_constraints;j++) { 6359 for (k=0;k<j+1;k++) { 6360 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6361 } 6362 } 6363 /* compute eigenvalues and eigenvectors of correlation matrix */ 6364 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6365 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDA)); 6366 #if !defined(PETSC_USE_COMPLEX) 6367 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6368 #else 6369 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6370 #endif 6371 PetscCall(PetscFPTrapPop()); 6372 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6373 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6374 j = 0; 6375 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6376 total_counts = total_counts-j; 6377 valid_constraints = temp_constraints-j; 6378 /* scale and copy POD basis into used quadrature memory */ 6379 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6380 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6381 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_K)); 6382 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6383 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDB)); 6384 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6385 if (j<temp_constraints) { 6386 PetscInt ii; 6387 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6388 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6389 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6390 PetscCall(PetscFPTrapPop()); 6391 for (k=0;k<temp_constraints-j;k++) { 6392 for (ii=0;ii<size_of_constraint;ii++) { 6393 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6394 } 6395 } 6396 } 6397 } else { 6398 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6399 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6400 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6401 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6402 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6403 #if !defined(PETSC_USE_COMPLEX) 6404 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6405 #else 6406 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6407 #endif 6408 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6409 PetscCall(PetscFPTrapPop()); 6410 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6411 k = temp_constraints; 6412 if (k > size_of_constraint) k = size_of_constraint; 6413 j = 0; 6414 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6415 valid_constraints = k-j; 6416 total_counts = total_counts-temp_constraints+valid_constraints; 6417 #else 6418 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6419 #endif /* on missing GESVD */ 6420 } 6421 } 6422 } 6423 /* update pointers information */ 6424 if (valid_constraints) { 6425 constraints_n[total_counts_cc] = valid_constraints; 6426 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6427 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6428 /* set change_of_basis flag */ 6429 if (boolforchange) { 6430 PetscBTSet(change_basis,total_counts_cc); 6431 } 6432 total_counts_cc++; 6433 } 6434 } 6435 /* free workspace */ 6436 if (!skip_lapack) { 6437 PetscCall(PetscFree(work)); 6438 #if defined(PETSC_USE_COMPLEX) 6439 PetscCall(PetscFree(rwork)); 6440 #endif 6441 PetscCall(PetscFree(singular_vals)); 6442 PetscCall(PetscFree(correlation_mat)); 6443 PetscCall(PetscFree(temp_basis)); 6444 } 6445 for (k=0;k<nnsp_size;k++) { 6446 PetscCall(VecDestroy(&localnearnullsp[k])); 6447 } 6448 PetscCall(PetscFree(localnearnullsp)); 6449 /* free index sets of faces, edges and vertices */ 6450 for (i=0;i<n_ISForFaces;i++) { 6451 PetscCall(ISDestroy(&ISForFaces[i])); 6452 } 6453 if (n_ISForFaces) { 6454 PetscCall(PetscFree(ISForFaces)); 6455 } 6456 for (i=0;i<n_ISForEdges;i++) { 6457 PetscCall(ISDestroy(&ISForEdges[i])); 6458 } 6459 if (n_ISForEdges) { 6460 PetscCall(PetscFree(ISForEdges)); 6461 } 6462 PetscCall(ISDestroy(&ISForVertices)); 6463 } else { 6464 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6465 6466 total_counts = 0; 6467 n_vertices = 0; 6468 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6469 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 6470 } 6471 max_constraints = 0; 6472 total_counts_cc = 0; 6473 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6474 total_counts += pcbddc->adaptive_constraints_n[i]; 6475 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6476 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6477 } 6478 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6479 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6480 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6481 constraints_data = pcbddc->adaptive_constraints_data; 6482 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6483 PetscCall(PetscMalloc1(total_counts_cc,&constraints_n)); 6484 total_counts_cc = 0; 6485 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6486 if (pcbddc->adaptive_constraints_n[i]) { 6487 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6488 } 6489 } 6490 6491 max_size_of_constraint = 0; 6492 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6493 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B)); 6494 /* Change of basis */ 6495 PetscCall(PetscBTCreate(total_counts_cc,&change_basis)); 6496 if (pcbddc->use_change_of_basis) { 6497 for (i=0;i<sub_schurs->n_subs;i++) { 6498 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6499 PetscCall(PetscBTSet(change_basis,i+n_vertices)); 6500 } 6501 } 6502 } 6503 } 6504 pcbddc->local_primal_size = total_counts; 6505 PetscCall(PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs)); 6506 6507 /* map constraints_idxs in boundary numbering */ 6508 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B)); 6509 PetscCheck(i == constraints_idxs_ptr[total_counts_cc],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT,constraints_idxs_ptr[total_counts_cc],i); 6510 6511 /* Create constraint matrix */ 6512 PetscCall(MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix)); 6513 PetscCall(MatSetType(pcbddc->ConstraintMatrix,MATAIJ)); 6514 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n)); 6515 6516 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6517 /* determine if a QR strategy is needed for change of basis */ 6518 qr_needed = pcbddc->use_qr_single; 6519 PetscCall(PetscBTCreate(total_counts_cc,&qr_needed_idx)); 6520 total_primal_vertices=0; 6521 pcbddc->local_primal_size_cc = 0; 6522 for (i=0;i<total_counts_cc;i++) { 6523 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6524 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6525 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6526 pcbddc->local_primal_size_cc += 1; 6527 } else if (PetscBTLookup(change_basis,i)) { 6528 for (k=0;k<constraints_n[i];k++) { 6529 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6530 } 6531 pcbddc->local_primal_size_cc += constraints_n[i]; 6532 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6533 PetscBTSet(qr_needed_idx,i); 6534 qr_needed = PETSC_TRUE; 6535 } 6536 } else { 6537 pcbddc->local_primal_size_cc += 1; 6538 } 6539 } 6540 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6541 pcbddc->n_vertices = total_primal_vertices; 6542 /* permute indices in order to have a sorted set of vertices */ 6543 PetscCall(PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs)); 6544 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult)); 6545 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices)); 6546 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6547 6548 /* nonzero structure of constraint matrix */ 6549 /* and get reference dof for local constraints */ 6550 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&nnz)); 6551 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6552 6553 j = total_primal_vertices; 6554 total_counts = total_primal_vertices; 6555 cum = total_primal_vertices; 6556 for (i=n_vertices;i<total_counts_cc;i++) { 6557 if (!PetscBTLookup(change_basis,i)) { 6558 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6559 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6560 cum++; 6561 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6562 for (k=0;k<constraints_n[i];k++) { 6563 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6564 nnz[j+k] = size_of_constraint; 6565 } 6566 j += constraints_n[i]; 6567 } 6568 } 6569 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz)); 6570 PetscCall(MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6571 PetscCall(PetscFree(nnz)); 6572 6573 /* set values in constraint matrix */ 6574 for (i=0;i<total_primal_vertices;i++) { 6575 PetscCall(MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES)); 6576 } 6577 total_counts = total_primal_vertices; 6578 for (i=n_vertices;i<total_counts_cc;i++) { 6579 if (!PetscBTLookup(change_basis,i)) { 6580 PetscInt *cols; 6581 6582 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6583 cols = constraints_idxs+constraints_idxs_ptr[i]; 6584 for (k=0;k<constraints_n[i];k++) { 6585 PetscInt row = total_counts+k; 6586 PetscScalar *vals; 6587 6588 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6589 PetscCall(MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES)); 6590 } 6591 total_counts += constraints_n[i]; 6592 } 6593 } 6594 /* assembling */ 6595 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6596 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6597 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view")); 6598 6599 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6600 if (pcbddc->use_change_of_basis) { 6601 /* dual and primal dofs on a single cc */ 6602 PetscInt dual_dofs,primal_dofs; 6603 /* working stuff for GEQRF */ 6604 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6605 PetscBLASInt lqr_work; 6606 /* working stuff for UNGQR */ 6607 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6608 PetscBLASInt lgqr_work; 6609 /* working stuff for TRTRS */ 6610 PetscScalar *trs_rhs = NULL; 6611 PetscBLASInt Blas_NRHS; 6612 /* pointers for values insertion into change of basis matrix */ 6613 PetscInt *start_rows,*start_cols; 6614 PetscScalar *start_vals; 6615 /* working stuff for values insertion */ 6616 PetscBT is_primal; 6617 PetscInt *aux_primal_numbering_B; 6618 /* matrix sizes */ 6619 PetscInt global_size,local_size; 6620 /* temporary change of basis */ 6621 Mat localChangeOfBasisMatrix; 6622 /* extra space for debugging */ 6623 PetscScalar *dbg_work = NULL; 6624 6625 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6626 PetscCall(MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix)); 6627 PetscCall(MatSetType(localChangeOfBasisMatrix,MATAIJ)); 6628 PetscCall(MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n)); 6629 /* nonzeros for local mat */ 6630 PetscCall(PetscMalloc1(pcis->n,&nnz)); 6631 if (!pcbddc->benign_change || pcbddc->fake_change) { 6632 for (i=0;i<pcis->n;i++) nnz[i]=1; 6633 } else { 6634 const PetscInt *ii; 6635 PetscInt n; 6636 PetscBool flg_row; 6637 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6638 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6639 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6640 } 6641 for (i=n_vertices;i<total_counts_cc;i++) { 6642 if (PetscBTLookup(change_basis,i)) { 6643 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6644 if (PetscBTLookup(qr_needed_idx,i)) { 6645 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6646 } else { 6647 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6648 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6649 } 6650 } 6651 } 6652 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz)); 6653 PetscCall(MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6654 PetscCall(PetscFree(nnz)); 6655 /* Set interior change in the matrix */ 6656 if (!pcbddc->benign_change || pcbddc->fake_change) { 6657 for (i=0;i<pcis->n;i++) { 6658 PetscCall(MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES)); 6659 } 6660 } else { 6661 const PetscInt *ii,*jj; 6662 PetscScalar *aa; 6663 PetscInt n; 6664 PetscBool flg_row; 6665 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6666 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change,&aa)); 6667 for (i=0;i<n;i++) { 6668 PetscCall(MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES)); 6669 } 6670 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change,&aa)); 6671 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6672 } 6673 6674 if (pcbddc->dbg_flag) { 6675 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6676 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank)); 6677 } 6678 6679 /* Now we loop on the constraints which need a change of basis */ 6680 /* 6681 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6682 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6683 6684 Basic blocks of change of basis matrix T computed by 6685 6686 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6687 6688 | 1 0 ... 0 s_1/S | 6689 | 0 1 ... 0 s_2/S | 6690 | ... | 6691 | 0 ... 1 s_{n-1}/S | 6692 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6693 6694 with S = \sum_{i=1}^n s_i^2 6695 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6696 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6697 6698 - QR decomposition of constraints otherwise 6699 */ 6700 if (qr_needed && max_size_of_constraint) { 6701 /* space to store Q */ 6702 PetscCall(PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis)); 6703 /* array to store scaling factors for reflectors */ 6704 PetscCall(PetscMalloc1(max_constraints,&qr_tau)); 6705 /* first we issue queries for optimal work */ 6706 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6707 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6708 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6709 lqr_work = -1; 6710 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6711 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6712 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work)); 6713 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work)); 6714 lgqr_work = -1; 6715 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6716 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_N)); 6717 PetscCall(PetscBLASIntCast(max_constraints,&Blas_K)); 6718 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6719 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6720 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6721 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6722 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work)); 6723 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work)); 6724 /* array to store rhs and solution of triangular solver */ 6725 PetscCall(PetscMalloc1(max_constraints*max_constraints,&trs_rhs)); 6726 /* allocating workspace for check */ 6727 if (pcbddc->dbg_flag) { 6728 PetscCall(PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work)); 6729 } 6730 } 6731 /* array to store whether a node is primal or not */ 6732 PetscCall(PetscBTCreate(pcis->n_B,&is_primal)); 6733 PetscCall(PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B)); 6734 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B)); 6735 PetscCheck(i == total_primal_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT,total_primal_vertices,i); 6736 for (i=0;i<total_primal_vertices;i++) { 6737 PetscCall(PetscBTSet(is_primal,aux_primal_numbering_B[i])); 6738 } 6739 PetscCall(PetscFree(aux_primal_numbering_B)); 6740 6741 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6742 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6743 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6744 if (PetscBTLookup(change_basis,total_counts)) { 6745 /* get constraint info */ 6746 primal_dofs = constraints_n[total_counts]; 6747 dual_dofs = size_of_constraint-primal_dofs; 6748 6749 if (pcbddc->dbg_flag) { 6750 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n",total_counts,primal_dofs,size_of_constraint)); 6751 } 6752 6753 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6754 6755 /* copy quadrature constraints for change of basis check */ 6756 if (pcbddc->dbg_flag) { 6757 PetscCall(PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6758 } 6759 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6760 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6761 6762 /* compute QR decomposition of constraints */ 6763 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6764 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6765 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6766 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6767 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6768 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6769 PetscCall(PetscFPTrapPop()); 6770 6771 /* explicitly compute R^-T */ 6772 PetscCall(PetscArrayzero(trs_rhs,primal_dofs*primal_dofs)); 6773 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6774 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6775 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_NRHS)); 6776 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6777 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6778 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6779 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6780 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6781 PetscCall(PetscFPTrapPop()); 6782 6783 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6784 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6785 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6786 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6787 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6788 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6789 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6790 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6791 PetscCall(PetscFPTrapPop()); 6792 6793 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6794 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6795 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6796 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6797 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6798 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6799 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6800 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6801 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6802 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6803 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6804 PetscCall(PetscFPTrapPop()); 6805 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6806 6807 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6808 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6809 /* insert cols for primal dofs */ 6810 for (j=0;j<primal_dofs;j++) { 6811 start_vals = &qr_basis[j*size_of_constraint]; 6812 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6813 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6814 } 6815 /* insert cols for dual dofs */ 6816 for (j=0,k=0;j<dual_dofs;k++) { 6817 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6818 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6819 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6820 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6821 j++; 6822 } 6823 } 6824 6825 /* check change of basis */ 6826 if (pcbddc->dbg_flag) { 6827 PetscInt ii,jj; 6828 PetscBool valid_qr=PETSC_TRUE; 6829 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_M)); 6830 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6831 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_K)); 6832 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6833 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDB)); 6834 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDC)); 6835 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6836 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6837 PetscCall(PetscFPTrapPop()); 6838 for (jj=0;jj<size_of_constraint;jj++) { 6839 for (ii=0;ii<primal_dofs;ii++) { 6840 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6841 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6842 } 6843 } 6844 if (!valid_qr) { 6845 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n")); 6846 for (jj=0;jj<size_of_constraint;jj++) { 6847 for (ii=0;ii<primal_dofs;ii++) { 6848 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6849 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n",jj,ii,(double)PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6850 } 6851 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6852 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n",jj,ii,(double)PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6853 } 6854 } 6855 } 6856 } else { 6857 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n")); 6858 } 6859 } 6860 } else { /* simple transformation block */ 6861 PetscInt row,col; 6862 PetscScalar val,norm; 6863 6864 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6865 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6866 for (j=0;j<size_of_constraint;j++) { 6867 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6868 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6869 if (!PetscBTLookup(is_primal,row_B)) { 6870 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6871 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES)); 6872 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES)); 6873 } else { 6874 for (k=0;k<size_of_constraint;k++) { 6875 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6876 if (row != col) { 6877 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6878 } else { 6879 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6880 } 6881 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES)); 6882 } 6883 } 6884 } 6885 if (pcbddc->dbg_flag) { 6886 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n")); 6887 } 6888 } 6889 } else { 6890 if (pcbddc->dbg_flag) { 6891 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n",total_counts,size_of_constraint)); 6892 } 6893 } 6894 } 6895 6896 /* free workspace */ 6897 if (qr_needed) { 6898 if (pcbddc->dbg_flag) { 6899 PetscCall(PetscFree(dbg_work)); 6900 } 6901 PetscCall(PetscFree(trs_rhs)); 6902 PetscCall(PetscFree(qr_tau)); 6903 PetscCall(PetscFree(qr_work)); 6904 PetscCall(PetscFree(gqr_work)); 6905 PetscCall(PetscFree(qr_basis)); 6906 } 6907 PetscCall(PetscBTDestroy(&is_primal)); 6908 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6909 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6910 6911 /* assembling of global change of variable */ 6912 if (!pcbddc->fake_change) { 6913 Mat tmat; 6914 PetscInt bs; 6915 6916 PetscCall(VecGetSize(pcis->vec1_global,&global_size)); 6917 PetscCall(VecGetLocalSize(pcis->vec1_global,&local_size)); 6918 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat)); 6919 PetscCall(MatISSetLocalMat(tmat,localChangeOfBasisMatrix)); 6920 PetscCall(MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY)); 6921 PetscCall(MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY)); 6922 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix)); 6923 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ)); 6924 PetscCall(MatGetBlockSize(pc->pmat,&bs)); 6925 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs)); 6926 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size)); 6927 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE)); 6928 PetscCall(MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix)); 6929 PetscCall(MatDestroy(&tmat)); 6930 PetscCall(VecSet(pcis->vec1_global,0.0)); 6931 PetscCall(VecSet(pcis->vec1_N,1.0)); 6932 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6933 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6934 PetscCall(VecReciprocal(pcis->vec1_global)); 6935 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL)); 6936 6937 /* check */ 6938 if (pcbddc->dbg_flag) { 6939 PetscReal error; 6940 Vec x,x_change; 6941 6942 PetscCall(VecDuplicate(pcis->vec1_global,&x)); 6943 PetscCall(VecDuplicate(pcis->vec1_global,&x_change)); 6944 PetscCall(VecSetRandom(x,NULL)); 6945 PetscCall(VecCopy(x,pcis->vec1_global)); 6946 PetscCall(VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6947 PetscCall(VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6948 PetscCall(MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N)); 6949 PetscCall(VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6950 PetscCall(VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6951 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change)); 6952 PetscCall(VecAXPY(x,-1.0,x_change)); 6953 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 6954 if (error > PETSC_SMALL) { 6955 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",(double)error); 6956 } 6957 PetscCall(VecDestroy(&x)); 6958 PetscCall(VecDestroy(&x_change)); 6959 } 6960 /* adapt sub_schurs computed (if any) */ 6961 if (pcbddc->use_deluxe_scaling) { 6962 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6963 6964 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6965 if (sub_schurs && sub_schurs->S_Ej_all) { 6966 Mat S_new,tmat; 6967 IS is_all_N,is_V_Sall = NULL; 6968 6969 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N)); 6970 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat)); 6971 if (pcbddc->deluxe_zerorows) { 6972 ISLocalToGlobalMapping NtoSall; 6973 IS is_V; 6974 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V)); 6975 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall)); 6976 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall)); 6977 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6978 PetscCall(ISDestroy(&is_V)); 6979 } 6980 PetscCall(ISDestroy(&is_all_N)); 6981 PetscCall(MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 6982 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6983 PetscCall(PetscObjectReference((PetscObject)S_new)); 6984 if (pcbddc->deluxe_zerorows) { 6985 const PetscScalar *array; 6986 const PetscInt *idxs_V,*idxs_all; 6987 PetscInt i,n_V; 6988 6989 PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 6990 PetscCall(ISGetLocalSize(is_V_Sall,&n_V)); 6991 PetscCall(ISGetIndices(is_V_Sall,&idxs_V)); 6992 PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs_all)); 6993 PetscCall(VecGetArrayRead(pcis->D,&array)); 6994 for (i=0;i<n_V;i++) { 6995 PetscScalar val; 6996 PetscInt idx; 6997 6998 idx = idxs_V[i]; 6999 val = array[idxs_all[idxs_V[i]]]; 7000 PetscCall(MatSetValue(S_new,idx,idx,val,INSERT_VALUES)); 7001 } 7002 PetscCall(MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY)); 7003 PetscCall(MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY)); 7004 PetscCall(VecRestoreArrayRead(pcis->D,&array)); 7005 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all)); 7006 PetscCall(ISRestoreIndices(is_V_Sall,&idxs_V)); 7007 } 7008 sub_schurs->S_Ej_all = S_new; 7009 PetscCall(MatDestroy(&S_new)); 7010 if (sub_schurs->sum_S_Ej_all) { 7011 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 7012 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7013 PetscCall(PetscObjectReference((PetscObject)S_new)); 7014 if (pcbddc->deluxe_zerorows) { 7015 PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 7016 } 7017 sub_schurs->sum_S_Ej_all = S_new; 7018 PetscCall(MatDestroy(&S_new)); 7019 } 7020 PetscCall(ISDestroy(&is_V_Sall)); 7021 PetscCall(MatDestroy(&tmat)); 7022 } 7023 /* destroy any change of basis context in sub_schurs */ 7024 if (sub_schurs && sub_schurs->change) { 7025 PetscInt i; 7026 7027 for (i=0;i<sub_schurs->n_subs;i++) { 7028 PetscCall(KSPDestroy(&sub_schurs->change[i])); 7029 } 7030 PetscCall(PetscFree(sub_schurs->change)); 7031 } 7032 } 7033 if (pcbddc->switch_static) { /* need to save the local change */ 7034 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7035 } else { 7036 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7037 } 7038 /* determine if any process has changed the pressures locally */ 7039 pcbddc->change_interior = pcbddc->benign_have_null; 7040 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7041 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7042 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7043 pcbddc->use_qr_single = qr_needed; 7044 } 7045 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7046 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7047 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7048 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7049 } else { 7050 Mat benign_global = NULL; 7051 if (pcbddc->benign_have_null) { 7052 Mat M; 7053 7054 pcbddc->change_interior = PETSC_TRUE; 7055 PetscCall(VecCopy(matis->counter,pcis->vec1_N)); 7056 PetscCall(VecReciprocal(pcis->vec1_N)); 7057 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global)); 7058 if (pcbddc->benign_change) { 7059 PetscCall(MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M)); 7060 PetscCall(MatDiagonalScale(M,pcis->vec1_N,NULL)); 7061 } else { 7062 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M)); 7063 PetscCall(MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES)); 7064 } 7065 PetscCall(MatISSetLocalMat(benign_global,M)); 7066 PetscCall(MatDestroy(&M)); 7067 PetscCall(MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY)); 7068 PetscCall(MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY)); 7069 } 7070 if (pcbddc->user_ChangeOfBasisMatrix) { 7071 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix)); 7072 PetscCall(MatDestroy(&benign_global)); 7073 } else if (pcbddc->benign_have_null) { 7074 pcbddc->ChangeOfBasisMatrix = benign_global; 7075 } 7076 } 7077 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7078 IS is_global; 7079 const PetscInt *gidxs; 7080 7081 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping,&gidxs)); 7082 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global)); 7083 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping,&gidxs)); 7084 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change)); 7085 PetscCall(ISDestroy(&is_global)); 7086 } 7087 } 7088 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7089 PetscCall(VecDuplicate(pcis->vec1_global,&pcbddc->work_change)); 7090 } 7091 7092 if (!pcbddc->fake_change) { 7093 /* add pressure dofs to set of primal nodes for numbering purposes */ 7094 for (i=0;i<pcbddc->benign_n;i++) { 7095 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7096 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7097 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7098 pcbddc->local_primal_size_cc++; 7099 pcbddc->local_primal_size++; 7100 } 7101 7102 /* check if a new primal space has been introduced (also take into account benign trick) */ 7103 pcbddc->new_primal_space_local = PETSC_TRUE; 7104 if (olocal_primal_size == pcbddc->local_primal_size) { 7105 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7106 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7107 if (!pcbddc->new_primal_space_local) { 7108 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7109 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7110 } 7111 } 7112 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7113 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 7114 } 7115 PetscCall(PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult)); 7116 7117 /* flush dbg viewer */ 7118 if (pcbddc->dbg_flag) { 7119 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7120 } 7121 7122 /* free workspace */ 7123 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7124 PetscCall(PetscBTDestroy(&change_basis)); 7125 if (!pcbddc->adaptive_selection) { 7126 PetscCall(PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n)); 7127 PetscCall(PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B)); 7128 } else { 7129 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n,pcbddc->adaptive_constraints_idxs_ptr,pcbddc->adaptive_constraints_data_ptr,pcbddc->adaptive_constraints_idxs,pcbddc->adaptive_constraints_data)); 7130 PetscCall(PetscFree(constraints_n)); 7131 PetscCall(PetscFree(constraints_idxs_B)); 7132 } 7133 PetscFunctionReturn(0); 7134 } 7135 7136 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7137 { 7138 ISLocalToGlobalMapping map; 7139 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7140 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7141 PetscInt i,N; 7142 PetscBool rcsr = PETSC_FALSE; 7143 7144 PetscFunctionBegin; 7145 if (pcbddc->recompute_topography) { 7146 pcbddc->graphanalyzed = PETSC_FALSE; 7147 /* Reset previously computed graph */ 7148 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7149 /* Init local Graph struct */ 7150 PetscCall(MatGetSize(pc->pmat,&N,NULL)); 7151 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&map,NULL)); 7152 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount)); 7153 7154 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7155 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local)); 7156 } 7157 /* Check validity of the csr graph passed in by the user */ 7158 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7159 7160 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7161 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7162 PetscInt *xadj,*adjncy; 7163 PetscInt nvtxs; 7164 PetscBool flg_row=PETSC_FALSE; 7165 7166 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7167 if (flg_row) { 7168 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES)); 7169 pcbddc->computed_rowadj = PETSC_TRUE; 7170 } 7171 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7172 rcsr = PETSC_TRUE; 7173 } 7174 if (pcbddc->dbg_flag) { 7175 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7176 } 7177 7178 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7179 PetscReal *lcoords; 7180 PetscInt n; 7181 MPI_Datatype dimrealtype; 7182 7183 /* TODO: support for blocked */ 7184 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7185 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7186 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords)); 7187 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype)); 7188 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7189 PetscCall(PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7190 PetscCall(PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7191 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7192 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7193 7194 pcbddc->mat_graph->coords = lcoords; 7195 pcbddc->mat_graph->cloc = PETSC_TRUE; 7196 pcbddc->mat_graph->cnloc = n; 7197 } 7198 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7199 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7200 7201 /* Setup of Graph */ 7202 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7203 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local)); 7204 7205 /* attach info on disconnected subdomains if present */ 7206 if (pcbddc->n_local_subs) { 7207 PetscInt *local_subs,n,totn; 7208 7209 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7210 PetscCall(PetscMalloc1(n,&local_subs)); 7211 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7212 for (i=0;i<pcbddc->n_local_subs;i++) { 7213 const PetscInt *idxs; 7214 PetscInt nl,j; 7215 7216 PetscCall(ISGetLocalSize(pcbddc->local_subs[i],&nl)); 7217 PetscCall(ISGetIndices(pcbddc->local_subs[i],&idxs)); 7218 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7219 PetscCall(ISRestoreIndices(pcbddc->local_subs[i],&idxs)); 7220 } 7221 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7222 pcbddc->mat_graph->n_local_subs = totn + 1; 7223 pcbddc->mat_graph->local_subs = local_subs; 7224 } 7225 } 7226 7227 if (!pcbddc->graphanalyzed) { 7228 /* Graph's connected components analysis */ 7229 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7230 pcbddc->graphanalyzed = PETSC_TRUE; 7231 pcbddc->corner_selected = pcbddc->corner_selection; 7232 } 7233 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7234 PetscFunctionReturn(0); 7235 } 7236 7237 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7238 { 7239 PetscInt i,j,n; 7240 PetscScalar *alphas; 7241 PetscReal norm,*onorms; 7242 7243 PetscFunctionBegin; 7244 n = *nio; 7245 if (!n) PetscFunctionReturn(0); 7246 PetscCall(PetscMalloc2(n,&alphas,n,&onorms)); 7247 PetscCall(VecNormalize(vecs[0],&norm)); 7248 if (norm < PETSC_SMALL) { 7249 onorms[0] = 0.0; 7250 PetscCall(VecSet(vecs[0],0.0)); 7251 } else { 7252 onorms[0] = norm; 7253 } 7254 7255 for (i=1;i<n;i++) { 7256 PetscCall(VecMDot(vecs[i],i,vecs,alphas)); 7257 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7258 PetscCall(VecMAXPY(vecs[i],i,alphas,vecs)); 7259 PetscCall(VecNormalize(vecs[i],&norm)); 7260 if (norm < PETSC_SMALL) { 7261 onorms[i] = 0.0; 7262 PetscCall(VecSet(vecs[i],0.0)); 7263 } else { 7264 onorms[i] = norm; 7265 } 7266 } 7267 /* push nonzero vectors at the beginning */ 7268 for (i=0;i<n;i++) { 7269 if (onorms[i] == 0.0) { 7270 for (j=i+1;j<n;j++) { 7271 if (onorms[j] != 0.0) { 7272 PetscCall(VecCopy(vecs[j],vecs[i])); 7273 onorms[j] = 0.0; 7274 } 7275 } 7276 } 7277 } 7278 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7279 PetscCall(PetscFree2(alphas,onorms)); 7280 PetscFunctionReturn(0); 7281 } 7282 7283 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7284 { 7285 ISLocalToGlobalMapping mapping; 7286 Mat A; 7287 PetscInt n_neighs,*neighs,*n_shared,**shared; 7288 PetscMPIInt size,rank,color; 7289 PetscInt *xadj,*adjncy; 7290 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7291 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7292 PetscInt void_procs,*procs_candidates = NULL; 7293 PetscInt xadj_count,*count; 7294 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7295 PetscSubcomm psubcomm; 7296 MPI_Comm subcomm; 7297 7298 PetscFunctionBegin; 7299 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7300 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7301 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7302 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7303 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7304 PetscCheck(*n_subdomains >0,PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %" PetscInt_FMT,*n_subdomains); 7305 7306 if (have_void) *have_void = PETSC_FALSE; 7307 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size)); 7308 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 7309 PetscCall(MatISGetLocalMat(mat,&A)); 7310 PetscCall(MatGetLocalSize(A,&n,NULL)); 7311 im_active = !!n; 7312 PetscCall(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat))); 7313 void_procs = size - active_procs; 7314 /* get ranks of of non-active processes in mat communicator */ 7315 if (void_procs) { 7316 PetscInt ncand; 7317 7318 if (have_void) *have_void = PETSC_TRUE; 7319 PetscCall(PetscMalloc1(size,&procs_candidates)); 7320 PetscCallMPI(MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 7321 for (i=0,ncand=0;i<size;i++) { 7322 if (!procs_candidates[i]) { 7323 procs_candidates[ncand++] = i; 7324 } 7325 } 7326 /* force n_subdomains to be not greater that the number of non-active processes */ 7327 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7328 } 7329 7330 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7331 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7332 PetscCall(MatGetSize(mat,&N,NULL)); 7333 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7334 PetscInt issize,isidx,dest; 7335 if (*n_subdomains == 1) dest = 0; 7336 else dest = rank; 7337 if (im_active) { 7338 issize = 1; 7339 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7340 isidx = procs_candidates[dest]; 7341 } else { 7342 isidx = dest; 7343 } 7344 } else { 7345 issize = 0; 7346 isidx = -1; 7347 } 7348 if (*n_subdomains != 1) *n_subdomains = active_procs; 7349 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends)); 7350 PetscCall(PetscFree(procs_candidates)); 7351 PetscFunctionReturn(0); 7352 } 7353 PetscCall(PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL)); 7354 PetscCall(PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL)); 7355 threshold = PetscMax(threshold,2); 7356 7357 /* Get info on mapping */ 7358 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7359 PetscCall(ISLocalToGlobalMappingGetInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7360 7361 /* build local CSR graph of subdomains' connectivity */ 7362 PetscCall(PetscMalloc1(2,&xadj)); 7363 xadj[0] = 0; 7364 xadj[1] = PetscMax(n_neighs-1,0); 7365 PetscCall(PetscMalloc1(xadj[1],&adjncy)); 7366 PetscCall(PetscMalloc1(xadj[1],&adjncy_wgt)); 7367 PetscCall(PetscCalloc1(n,&count)); 7368 for (i=1;i<n_neighs;i++) 7369 for (j=0;j<n_shared[i];j++) 7370 count[shared[i][j]] += 1; 7371 7372 xadj_count = 0; 7373 for (i=1;i<n_neighs;i++) { 7374 for (j=0;j<n_shared[i];j++) { 7375 if (count[shared[i][j]] < threshold) { 7376 adjncy[xadj_count] = neighs[i]; 7377 adjncy_wgt[xadj_count] = n_shared[i]; 7378 xadj_count++; 7379 break; 7380 } 7381 } 7382 } 7383 xadj[1] = xadj_count; 7384 PetscCall(PetscFree(count)); 7385 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7386 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7387 7388 PetscCall(PetscMalloc1(1,&ranks_send_to_idx)); 7389 7390 /* Restrict work on active processes only */ 7391 PetscCall(PetscMPIIntCast(im_active,&color)); 7392 if (void_procs) { 7393 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm)); 7394 PetscCall(PetscSubcommSetNumber(psubcomm,2)); /* 2 groups, active process and not active processes */ 7395 PetscCall(PetscSubcommSetTypeGeneral(psubcomm,color,rank)); 7396 subcomm = PetscSubcommChild(psubcomm); 7397 } else { 7398 psubcomm = NULL; 7399 subcomm = PetscObjectComm((PetscObject)mat); 7400 } 7401 7402 v_wgt = NULL; 7403 if (!color) { 7404 PetscCall(PetscFree(xadj)); 7405 PetscCall(PetscFree(adjncy)); 7406 PetscCall(PetscFree(adjncy_wgt)); 7407 } else { 7408 Mat subdomain_adj; 7409 IS new_ranks,new_ranks_contig; 7410 MatPartitioning partitioner; 7411 PetscInt rstart=0,rend=0; 7412 PetscInt *is_indices,*oldranks; 7413 PetscMPIInt size; 7414 PetscBool aggregate; 7415 7416 PetscCallMPI(MPI_Comm_size(subcomm,&size)); 7417 if (void_procs) { 7418 PetscInt prank = rank; 7419 PetscCall(PetscMalloc1(size,&oldranks)); 7420 PetscCallMPI(MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm)); 7421 for (i=0;i<xadj[1];i++) { 7422 PetscCall(PetscFindInt(adjncy[i],size,oldranks,&adjncy[i])); 7423 } 7424 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7425 } else { 7426 oldranks = NULL; 7427 } 7428 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7429 if (aggregate) { /* TODO: all this part could be made more efficient */ 7430 PetscInt lrows,row,ncols,*cols; 7431 PetscMPIInt nrank; 7432 PetscScalar *vals; 7433 7434 PetscCallMPI(MPI_Comm_rank(subcomm,&nrank)); 7435 lrows = 0; 7436 if (nrank<redprocs) { 7437 lrows = size/redprocs; 7438 if (nrank<size%redprocs) lrows++; 7439 } 7440 PetscCall(MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj)); 7441 PetscCall(MatGetOwnershipRange(subdomain_adj,&rstart,&rend)); 7442 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 7443 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 7444 row = nrank; 7445 ncols = xadj[1]-xadj[0]; 7446 cols = adjncy; 7447 PetscCall(PetscMalloc1(ncols,&vals)); 7448 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7449 PetscCall(MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES)); 7450 PetscCall(MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7451 PetscCall(MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7452 PetscCall(PetscFree(xadj)); 7453 PetscCall(PetscFree(adjncy)); 7454 PetscCall(PetscFree(adjncy_wgt)); 7455 PetscCall(PetscFree(vals)); 7456 if (use_vwgt) { 7457 Vec v; 7458 const PetscScalar *array; 7459 PetscInt nl; 7460 7461 PetscCall(MatCreateVecs(subdomain_adj,&v,NULL)); 7462 PetscCall(VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES)); 7463 PetscCall(VecAssemblyBegin(v)); 7464 PetscCall(VecAssemblyEnd(v)); 7465 PetscCall(VecGetLocalSize(v,&nl)); 7466 PetscCall(VecGetArrayRead(v,&array)); 7467 PetscCall(PetscMalloc1(nl,&v_wgt)); 7468 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7469 PetscCall(VecRestoreArrayRead(v,&array)); 7470 PetscCall(VecDestroy(&v)); 7471 } 7472 } else { 7473 PetscCall(MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj)); 7474 if (use_vwgt) { 7475 PetscCall(PetscMalloc1(1,&v_wgt)); 7476 v_wgt[0] = n; 7477 } 7478 } 7479 /* PetscCall(MatView(subdomain_adj,0)); */ 7480 7481 /* Partition */ 7482 PetscCall(MatPartitioningCreate(subcomm,&partitioner)); 7483 #if defined(PETSC_HAVE_PTSCOTCH) 7484 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH)); 7485 #elif defined(PETSC_HAVE_PARMETIS) 7486 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS)); 7487 #else 7488 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE)); 7489 #endif 7490 PetscCall(MatPartitioningSetAdjacency(partitioner,subdomain_adj)); 7491 if (v_wgt) { 7492 PetscCall(MatPartitioningSetVertexWeights(partitioner,v_wgt)); 7493 } 7494 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7495 PetscCall(MatPartitioningSetNParts(partitioner,*n_subdomains)); 7496 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7497 PetscCall(MatPartitioningApply(partitioner,&new_ranks)); 7498 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7499 7500 /* renumber new_ranks to avoid "holes" in new set of processors */ 7501 PetscCall(ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig)); 7502 PetscCall(ISDestroy(&new_ranks)); 7503 PetscCall(ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7504 if (!aggregate) { 7505 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7506 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7507 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7508 } else if (oldranks) { 7509 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7510 } else { 7511 ranks_send_to_idx[0] = is_indices[0]; 7512 } 7513 } else { 7514 PetscInt idx = 0; 7515 PetscMPIInt tag; 7516 MPI_Request *reqs; 7517 7518 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag)); 7519 PetscCall(PetscMalloc1(rend-rstart,&reqs)); 7520 for (i=rstart;i<rend;i++) { 7521 PetscCallMPI(MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart])); 7522 } 7523 PetscCallMPI(MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE)); 7524 PetscCallMPI(MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE)); 7525 PetscCall(PetscFree(reqs)); 7526 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7527 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7528 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7529 } else if (oldranks) { 7530 ranks_send_to_idx[0] = oldranks[idx]; 7531 } else { 7532 ranks_send_to_idx[0] = idx; 7533 } 7534 } 7535 PetscCall(ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7536 /* clean up */ 7537 PetscCall(PetscFree(oldranks)); 7538 PetscCall(ISDestroy(&new_ranks_contig)); 7539 PetscCall(MatDestroy(&subdomain_adj)); 7540 PetscCall(MatPartitioningDestroy(&partitioner)); 7541 } 7542 PetscCall(PetscSubcommDestroy(&psubcomm)); 7543 PetscCall(PetscFree(procs_candidates)); 7544 7545 /* assemble parallel IS for sends */ 7546 i = 1; 7547 if (!color) i=0; 7548 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends)); 7549 PetscFunctionReturn(0); 7550 } 7551 7552 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7553 7554 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7555 { 7556 Mat local_mat; 7557 IS is_sends_internal; 7558 PetscInt rows,cols,new_local_rows; 7559 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7560 PetscBool ismatis,isdense,newisdense,destroy_mat; 7561 ISLocalToGlobalMapping l2gmap; 7562 PetscInt* l2gmap_indices; 7563 const PetscInt* is_indices; 7564 MatType new_local_type; 7565 /* buffers */ 7566 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7567 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7568 PetscInt *recv_buffer_idxs_local; 7569 PetscScalar *ptr_vals,*recv_buffer_vals; 7570 const PetscScalar *send_buffer_vals; 7571 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7572 /* MPI */ 7573 MPI_Comm comm,comm_n; 7574 PetscSubcomm subcomm; 7575 PetscMPIInt n_sends,n_recvs,size; 7576 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7577 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7578 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7579 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7580 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7581 7582 PetscFunctionBegin; 7583 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7584 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7585 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7586 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7587 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7588 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7589 PetscValidLogicalCollectiveBool(mat,reuse,6); 7590 PetscValidLogicalCollectiveInt(mat,nis,8); 7591 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7592 if (nvecs) { 7593 PetscCheck(nvecs <= 1,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7594 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7595 } 7596 /* further checks */ 7597 PetscCall(MatISGetLocalMat(mat,&local_mat)); 7598 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense)); 7599 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7600 PetscCall(MatGetSize(local_mat,&rows,&cols)); 7601 PetscCheck(rows == cols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7602 if (reuse && *mat_n) { 7603 PetscInt mrows,mcols,mnrows,mncols; 7604 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7605 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis)); 7606 PetscCheck(ismatis,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7607 PetscCall(MatGetSize(mat,&mrows,&mcols)); 7608 PetscCall(MatGetSize(*mat_n,&mnrows,&mncols)); 7609 PetscCheck(mrows == mnrows,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT,mrows,mnrows); 7610 PetscCheck(mcols == mncols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT,mcols,mncols); 7611 } 7612 PetscCall(MatGetBlockSize(local_mat,&bs)); 7613 PetscValidLogicalCollectiveInt(mat,bs,1); 7614 7615 /* prepare IS for sending if not provided */ 7616 if (!is_sends) { 7617 PetscCheck(n_subdomains,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7618 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL)); 7619 } else { 7620 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7621 is_sends_internal = is_sends; 7622 } 7623 7624 /* get comm */ 7625 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 7626 7627 /* compute number of sends */ 7628 PetscCall(ISGetLocalSize(is_sends_internal,&i)); 7629 PetscCall(PetscMPIIntCast(i,&n_sends)); 7630 7631 /* compute number of receives */ 7632 PetscCallMPI(MPI_Comm_size(comm,&size)); 7633 PetscCall(PetscMalloc1(size,&iflags)); 7634 PetscCall(PetscArrayzero(iflags,size)); 7635 PetscCall(ISGetIndices(is_sends_internal,&is_indices)); 7636 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7637 PetscCall(PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs)); 7638 PetscCall(PetscFree(iflags)); 7639 7640 /* restrict comm if requested */ 7641 subcomm = NULL; 7642 destroy_mat = PETSC_FALSE; 7643 if (restrict_comm) { 7644 PetscMPIInt color,subcommsize; 7645 7646 color = 0; 7647 if (restrict_full) { 7648 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7649 } else { 7650 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7651 } 7652 PetscCall(MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm)); 7653 subcommsize = size - subcommsize; 7654 /* check if reuse has been requested */ 7655 if (reuse) { 7656 if (*mat_n) { 7657 PetscMPIInt subcommsize2; 7658 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2)); 7659 PetscCheck(subcommsize == subcommsize2,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7660 comm_n = PetscObjectComm((PetscObject)*mat_n); 7661 } else { 7662 comm_n = PETSC_COMM_SELF; 7663 } 7664 } else { /* MAT_INITIAL_MATRIX */ 7665 PetscMPIInt rank; 7666 7667 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 7668 PetscCall(PetscSubcommCreate(comm,&subcomm)); 7669 PetscCall(PetscSubcommSetNumber(subcomm,2)); 7670 PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank)); 7671 comm_n = PetscSubcommChild(subcomm); 7672 } 7673 /* flag to destroy *mat_n if not significative */ 7674 if (color) destroy_mat = PETSC_TRUE; 7675 } else { 7676 comm_n = comm; 7677 } 7678 7679 /* prepare send/receive buffers */ 7680 PetscCall(PetscMalloc1(size,&ilengths_idxs)); 7681 PetscCall(PetscArrayzero(ilengths_idxs,size)); 7682 PetscCall(PetscMalloc1(size,&ilengths_vals)); 7683 PetscCall(PetscArrayzero(ilengths_vals,size)); 7684 if (nis) { 7685 PetscCall(PetscCalloc1(size,&ilengths_idxs_is)); 7686 } 7687 7688 /* Get data from local matrices */ 7689 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7690 /* TODO: See below some guidelines on how to prepare the local buffers */ 7691 /* 7692 send_buffer_vals should contain the raw values of the local matrix 7693 send_buffer_idxs should contain: 7694 - MatType_PRIVATE type 7695 - PetscInt size_of_l2gmap 7696 - PetscInt global_row_indices[size_of_l2gmap] 7697 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7698 */ 7699 { 7700 ISLocalToGlobalMapping mapping; 7701 7702 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7703 PetscCall(MatDenseGetArrayRead(local_mat,&send_buffer_vals)); 7704 PetscCall(ISLocalToGlobalMappingGetSize(mapping,&i)); 7705 PetscCall(PetscMalloc1(i+2,&send_buffer_idxs)); 7706 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7707 send_buffer_idxs[1] = i; 7708 PetscCall(ISLocalToGlobalMappingGetIndices(mapping,(const PetscInt**)&ptr_idxs)); 7709 PetscCall(PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i)); 7710 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping,(const PetscInt**)&ptr_idxs)); 7711 PetscCall(PetscMPIIntCast(i,&len)); 7712 for (i=0;i<n_sends;i++) { 7713 ilengths_vals[is_indices[i]] = len*len; 7714 ilengths_idxs[is_indices[i]] = len+2; 7715 } 7716 } 7717 PetscCall(PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals)); 7718 /* additional is (if any) */ 7719 if (nis) { 7720 PetscMPIInt psum; 7721 PetscInt j; 7722 for (j=0,psum=0;j<nis;j++) { 7723 PetscInt plen; 7724 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7725 PetscCall(PetscMPIIntCast(plen,&len)); 7726 psum += len+1; /* indices + length */ 7727 } 7728 PetscCall(PetscMalloc1(psum,&send_buffer_idxs_is)); 7729 for (j=0,psum=0;j<nis;j++) { 7730 PetscInt plen; 7731 const PetscInt *is_array_idxs; 7732 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7733 send_buffer_idxs_is[psum] = plen; 7734 PetscCall(ISGetIndices(isarray[j],&is_array_idxs)); 7735 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen)); 7736 PetscCall(ISRestoreIndices(isarray[j],&is_array_idxs)); 7737 psum += plen+1; /* indices + length */ 7738 } 7739 for (i=0;i<n_sends;i++) { 7740 ilengths_idxs_is[is_indices[i]] = psum; 7741 } 7742 PetscCall(PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is)); 7743 } 7744 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 7745 7746 buf_size_idxs = 0; 7747 buf_size_vals = 0; 7748 buf_size_idxs_is = 0; 7749 buf_size_vecs = 0; 7750 for (i=0;i<n_recvs;i++) { 7751 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7752 buf_size_vals += (PetscInt)olengths_vals[i]; 7753 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7754 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7755 } 7756 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs)); 7757 PetscCall(PetscMalloc1(buf_size_vals,&recv_buffer_vals)); 7758 PetscCall(PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is)); 7759 PetscCall(PetscMalloc1(buf_size_vecs,&recv_buffer_vecs)); 7760 7761 /* get new tags for clean communications */ 7762 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs)); 7763 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vals)); 7764 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is)); 7765 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vecs)); 7766 7767 /* allocate for requests */ 7768 PetscCall(PetscMalloc1(n_sends,&send_req_idxs)); 7769 PetscCall(PetscMalloc1(n_sends,&send_req_vals)); 7770 PetscCall(PetscMalloc1(n_sends,&send_req_idxs_is)); 7771 PetscCall(PetscMalloc1(n_sends,&send_req_vecs)); 7772 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs)); 7773 PetscCall(PetscMalloc1(n_recvs,&recv_req_vals)); 7774 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs_is)); 7775 PetscCall(PetscMalloc1(n_recvs,&recv_req_vecs)); 7776 7777 /* communications */ 7778 ptr_idxs = recv_buffer_idxs; 7779 ptr_vals = recv_buffer_vals; 7780 ptr_idxs_is = recv_buffer_idxs_is; 7781 ptr_vecs = recv_buffer_vecs; 7782 for (i=0;i<n_recvs;i++) { 7783 source_dest = onodes[i]; 7784 PetscCallMPI(MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i])); 7785 PetscCallMPI(MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i])); 7786 ptr_idxs += olengths_idxs[i]; 7787 ptr_vals += olengths_vals[i]; 7788 if (nis) { 7789 source_dest = onodes_is[i]; 7790 PetscCallMPI(MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i])); 7791 ptr_idxs_is += olengths_idxs_is[i]; 7792 } 7793 if (nvecs) { 7794 source_dest = onodes[i]; 7795 PetscCallMPI(MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i])); 7796 ptr_vecs += olengths_idxs[i]-2; 7797 } 7798 } 7799 for (i=0;i<n_sends;i++) { 7800 PetscCall(PetscMPIIntCast(is_indices[i],&source_dest)); 7801 PetscCallMPI(MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i])); 7802 PetscCallMPI(MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i])); 7803 if (nis) { 7804 PetscCallMPI(MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i])); 7805 } 7806 if (nvecs) { 7807 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 7808 PetscCallMPI(MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i])); 7809 } 7810 } 7811 PetscCall(ISRestoreIndices(is_sends_internal,&is_indices)); 7812 PetscCall(ISDestroy(&is_sends_internal)); 7813 7814 /* assemble new l2g map */ 7815 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE)); 7816 ptr_idxs = recv_buffer_idxs; 7817 new_local_rows = 0; 7818 for (i=0;i<n_recvs;i++) { 7819 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7820 ptr_idxs += olengths_idxs[i]; 7821 } 7822 PetscCall(PetscMalloc1(new_local_rows,&l2gmap_indices)); 7823 ptr_idxs = recv_buffer_idxs; 7824 new_local_rows = 0; 7825 for (i=0;i<n_recvs;i++) { 7826 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1))); 7827 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7828 ptr_idxs += olengths_idxs[i]; 7829 } 7830 PetscCall(PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices)); 7831 PetscCall(ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap)); 7832 PetscCall(PetscFree(l2gmap_indices)); 7833 7834 /* infer new local matrix type from received local matrices type */ 7835 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7836 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7837 if (n_recvs) { 7838 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7839 ptr_idxs = recv_buffer_idxs; 7840 for (i=0;i<n_recvs;i++) { 7841 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7842 new_local_type_private = MATAIJ_PRIVATE; 7843 break; 7844 } 7845 ptr_idxs += olengths_idxs[i]; 7846 } 7847 switch (new_local_type_private) { 7848 case MATDENSE_PRIVATE: 7849 new_local_type = MATSEQAIJ; 7850 bs = 1; 7851 break; 7852 case MATAIJ_PRIVATE: 7853 new_local_type = MATSEQAIJ; 7854 bs = 1; 7855 break; 7856 case MATBAIJ_PRIVATE: 7857 new_local_type = MATSEQBAIJ; 7858 break; 7859 case MATSBAIJ_PRIVATE: 7860 new_local_type = MATSEQSBAIJ; 7861 break; 7862 default: 7863 SETERRQ(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7864 } 7865 } else { /* by default, new_local_type is seqaij */ 7866 new_local_type = MATSEQAIJ; 7867 bs = 1; 7868 } 7869 7870 /* create MATIS object if needed */ 7871 if (!reuse) { 7872 PetscCall(MatGetSize(mat,&rows,&cols)); 7873 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7874 } else { 7875 /* it also destroys the local matrices */ 7876 if (*mat_n) { 7877 PetscCall(MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap)); 7878 } else { /* this is a fake object */ 7879 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7880 } 7881 } 7882 PetscCall(MatISGetLocalMat(*mat_n,&local_mat)); 7883 PetscCall(MatSetType(local_mat,new_local_type)); 7884 7885 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE)); 7886 7887 /* Global to local map of received indices */ 7888 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local)); /* needed for values insertion */ 7889 PetscCall(ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local)); 7890 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7891 7892 /* restore attributes -> type of incoming data and its size */ 7893 buf_size_idxs = 0; 7894 for (i=0;i<n_recvs;i++) { 7895 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7896 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7897 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7898 } 7899 PetscCall(PetscFree(recv_buffer_idxs)); 7900 7901 /* set preallocation */ 7902 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense)); 7903 if (!newisdense) { 7904 PetscInt *new_local_nnz=NULL; 7905 7906 ptr_idxs = recv_buffer_idxs_local; 7907 if (n_recvs) { 7908 PetscCall(PetscCalloc1(new_local_rows,&new_local_nnz)); 7909 } 7910 for (i=0;i<n_recvs;i++) { 7911 PetscInt j; 7912 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7913 for (j=0;j<*(ptr_idxs+1);j++) { 7914 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7915 } 7916 } else { 7917 /* TODO */ 7918 } 7919 ptr_idxs += olengths_idxs[i]; 7920 } 7921 if (new_local_nnz) { 7922 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7923 PetscCall(MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz)); 7924 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7925 PetscCall(MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7926 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7927 PetscCall(MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7928 } else { 7929 PetscCall(MatSetUp(local_mat)); 7930 } 7931 PetscCall(PetscFree(new_local_nnz)); 7932 } else { 7933 PetscCall(MatSetUp(local_mat)); 7934 } 7935 7936 /* set values */ 7937 ptr_vals = recv_buffer_vals; 7938 ptr_idxs = recv_buffer_idxs_local; 7939 for (i=0;i<n_recvs;i++) { 7940 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7941 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 7942 PetscCall(MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES)); 7943 PetscCall(MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY)); 7944 PetscCall(MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY)); 7945 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 7946 } else { 7947 /* TODO */ 7948 } 7949 ptr_idxs += olengths_idxs[i]; 7950 ptr_vals += olengths_vals[i]; 7951 } 7952 PetscCall(MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY)); 7953 PetscCall(MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY)); 7954 PetscCall(MatISRestoreLocalMat(*mat_n,&local_mat)); 7955 PetscCall(MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY)); 7956 PetscCall(MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY)); 7957 PetscCall(PetscFree(recv_buffer_vals)); 7958 7959 #if 0 7960 if (!restrict_comm) { /* check */ 7961 Vec lvec,rvec; 7962 PetscReal infty_error; 7963 7964 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7965 PetscCall(VecSetRandom(rvec,NULL)); 7966 PetscCall(MatMult(mat,rvec,lvec)); 7967 PetscCall(VecScale(lvec,-1.0)); 7968 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7969 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7970 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7971 PetscCall(VecDestroy(&rvec)); 7972 PetscCall(VecDestroy(&lvec)); 7973 } 7974 #endif 7975 7976 /* assemble new additional is (if any) */ 7977 if (nis) { 7978 PetscInt **temp_idxs,*count_is,j,psum; 7979 7980 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE)); 7981 PetscCall(PetscCalloc1(nis,&count_is)); 7982 ptr_idxs = recv_buffer_idxs_is; 7983 psum = 0; 7984 for (i=0;i<n_recvs;i++) { 7985 for (j=0;j<nis;j++) { 7986 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7987 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7988 psum += plen; 7989 ptr_idxs += plen+1; /* shift pointer to received data */ 7990 } 7991 } 7992 PetscCall(PetscMalloc1(nis,&temp_idxs)); 7993 PetscCall(PetscMalloc1(psum,&temp_idxs[0])); 7994 for (i=1;i<nis;i++) { 7995 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7996 } 7997 PetscCall(PetscArrayzero(count_is,nis)); 7998 ptr_idxs = recv_buffer_idxs_is; 7999 for (i=0;i<n_recvs;i++) { 8000 for (j=0;j<nis;j++) { 8001 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8002 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen)); 8003 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8004 ptr_idxs += plen+1; /* shift pointer to received data */ 8005 } 8006 } 8007 for (i=0;i<nis;i++) { 8008 PetscCall(ISDestroy(&isarray[i])); 8009 PetscCall(PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i])); 8010 PetscCall(ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i])); 8011 } 8012 PetscCall(PetscFree(count_is)); 8013 PetscCall(PetscFree(temp_idxs[0])); 8014 PetscCall(PetscFree(temp_idxs)); 8015 } 8016 /* free workspace */ 8017 PetscCall(PetscFree(recv_buffer_idxs_is)); 8018 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE)); 8019 PetscCall(PetscFree(send_buffer_idxs)); 8020 PetscCallMPI(MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE)); 8021 if (isdense) { 8022 PetscCall(MatISGetLocalMat(mat,&local_mat)); 8023 PetscCall(MatDenseRestoreArrayRead(local_mat,&send_buffer_vals)); 8024 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 8025 } else { 8026 /* PetscCall(PetscFree(send_buffer_vals)); */ 8027 } 8028 if (nis) { 8029 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE)); 8030 PetscCall(PetscFree(send_buffer_idxs_is)); 8031 } 8032 8033 if (nvecs) { 8034 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE)); 8035 PetscCallMPI(MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE)); 8036 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8037 PetscCall(VecDestroy(&nnsp_vec[0])); 8038 PetscCall(VecCreate(comm_n,&nnsp_vec[0])); 8039 PetscCall(VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE)); 8040 PetscCall(VecSetType(nnsp_vec[0],VECSTANDARD)); 8041 /* set values */ 8042 ptr_vals = recv_buffer_vecs; 8043 ptr_idxs = recv_buffer_idxs_local; 8044 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 8045 for (i=0;i<n_recvs;i++) { 8046 PetscInt j; 8047 for (j=0;j<*(ptr_idxs+1);j++) { 8048 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8049 } 8050 ptr_idxs += olengths_idxs[i]; 8051 ptr_vals += olengths_idxs[i]-2; 8052 } 8053 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8054 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8055 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8056 } 8057 8058 PetscCall(PetscFree(recv_buffer_vecs)); 8059 PetscCall(PetscFree(recv_buffer_idxs_local)); 8060 PetscCall(PetscFree(recv_req_idxs)); 8061 PetscCall(PetscFree(recv_req_vals)); 8062 PetscCall(PetscFree(recv_req_vecs)); 8063 PetscCall(PetscFree(recv_req_idxs_is)); 8064 PetscCall(PetscFree(send_req_idxs)); 8065 PetscCall(PetscFree(send_req_vals)); 8066 PetscCall(PetscFree(send_req_vecs)); 8067 PetscCall(PetscFree(send_req_idxs_is)); 8068 PetscCall(PetscFree(ilengths_vals)); 8069 PetscCall(PetscFree(ilengths_idxs)); 8070 PetscCall(PetscFree(olengths_vals)); 8071 PetscCall(PetscFree(olengths_idxs)); 8072 PetscCall(PetscFree(onodes)); 8073 if (nis) { 8074 PetscCall(PetscFree(ilengths_idxs_is)); 8075 PetscCall(PetscFree(olengths_idxs_is)); 8076 PetscCall(PetscFree(onodes_is)); 8077 } 8078 PetscCall(PetscSubcommDestroy(&subcomm)); 8079 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8080 PetscCall(MatDestroy(mat_n)); 8081 for (i=0;i<nis;i++) { 8082 PetscCall(ISDestroy(&isarray[i])); 8083 } 8084 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8085 PetscCall(VecDestroy(&nnsp_vec[0])); 8086 } 8087 *mat_n = NULL; 8088 } 8089 PetscFunctionReturn(0); 8090 } 8091 8092 /* temporary hack into ksp private data structure */ 8093 #include <petsc/private/kspimpl.h> 8094 8095 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8096 { 8097 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8098 PC_IS *pcis = (PC_IS*)pc->data; 8099 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8100 Mat coarsedivudotp = NULL; 8101 Mat coarseG,t_coarse_mat_is; 8102 MatNullSpace CoarseNullSpace = NULL; 8103 ISLocalToGlobalMapping coarse_islg; 8104 IS coarse_is,*isarray,corners; 8105 PetscInt i,im_active=-1,active_procs=-1; 8106 PetscInt nis,nisdofs,nisneu,nisvert; 8107 PetscInt coarse_eqs_per_proc; 8108 PC pc_temp; 8109 PCType coarse_pc_type; 8110 KSPType coarse_ksp_type; 8111 PetscBool multilevel_requested,multilevel_allowed; 8112 PetscBool coarse_reuse; 8113 PetscInt ncoarse,nedcfield; 8114 PetscBool compute_vecs = PETSC_FALSE; 8115 PetscScalar *array; 8116 MatReuse coarse_mat_reuse; 8117 PetscBool restr, full_restr, have_void; 8118 PetscMPIInt size; 8119 8120 PetscFunctionBegin; 8121 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8122 /* Assign global numbering to coarse dofs */ 8123 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8124 PetscInt ocoarse_size; 8125 compute_vecs = PETSC_TRUE; 8126 8127 pcbddc->new_primal_space = PETSC_TRUE; 8128 ocoarse_size = pcbddc->coarse_size; 8129 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8130 PetscCall(PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices)); 8131 /* see if we can avoid some work */ 8132 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8133 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8134 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8135 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8136 coarse_reuse = PETSC_FALSE; 8137 } else { /* we can safely reuse already computed coarse matrix */ 8138 coarse_reuse = PETSC_TRUE; 8139 } 8140 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8141 coarse_reuse = PETSC_FALSE; 8142 } 8143 /* reset any subassembling information */ 8144 if (!coarse_reuse || pcbddc->recompute_topography) { 8145 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8146 } 8147 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8148 coarse_reuse = PETSC_TRUE; 8149 } 8150 if (coarse_reuse && pcbddc->coarse_ksp) { 8151 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 8152 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8153 coarse_mat_reuse = MAT_REUSE_MATRIX; 8154 } else { 8155 coarse_mat = NULL; 8156 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8157 } 8158 8159 /* creates temporary l2gmap and IS for coarse indexes */ 8160 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is)); 8161 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg)); 8162 8163 /* creates temporary MATIS object for coarse matrix */ 8164 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense)); 8165 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,coarse_islg,&t_coarse_mat_is)); 8166 PetscCall(MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense)); 8167 PetscCall(MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8168 PetscCall(MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8169 PetscCall(MatDestroy(&coarse_submat_dense)); 8170 8171 /* count "active" (i.e. with positive local size) and "void" processes */ 8172 im_active = !!(pcis->n); 8173 PetscCall(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8174 8175 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8176 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8177 /* full_restr : just use the receivers from the subassembling pattern */ 8178 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size)); 8179 coarse_mat_is = NULL; 8180 multilevel_allowed = PETSC_FALSE; 8181 multilevel_requested = PETSC_FALSE; 8182 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8183 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8184 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8185 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8186 if (multilevel_requested) { 8187 ncoarse = active_procs/pcbddc->coarsening_ratio; 8188 restr = PETSC_FALSE; 8189 full_restr = PETSC_FALSE; 8190 } else { 8191 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8192 restr = PETSC_TRUE; 8193 full_restr = PETSC_TRUE; 8194 } 8195 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8196 ncoarse = PetscMax(1,ncoarse); 8197 if (!pcbddc->coarse_subassembling) { 8198 if (pcbddc->coarsening_ratio > 1) { 8199 if (multilevel_requested) { 8200 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8201 } else { 8202 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8203 } 8204 } else { 8205 PetscMPIInt rank; 8206 8207 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank)); 8208 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8209 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling)); 8210 } 8211 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8212 PetscInt psum; 8213 if (pcbddc->coarse_ksp) psum = 1; 8214 else psum = 0; 8215 PetscCall(MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8216 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8217 } 8218 /* determine if we can go multilevel */ 8219 if (multilevel_requested) { 8220 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8221 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8222 } 8223 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8224 8225 /* dump subassembling pattern */ 8226 if (pcbddc->dbg_flag && multilevel_allowed) { 8227 PetscCall(ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer)); 8228 } 8229 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8230 nedcfield = -1; 8231 corners = NULL; 8232 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8233 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8234 const PetscInt *idxs; 8235 ISLocalToGlobalMapping tmap; 8236 8237 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8238 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap)); 8239 /* allocate space for temporary storage */ 8240 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs)); 8241 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs2)); 8242 /* allocate for IS array */ 8243 nisdofs = pcbddc->n_ISForDofsLocal; 8244 if (pcbddc->nedclocal) { 8245 if (pcbddc->nedfield > -1) { 8246 nedcfield = pcbddc->nedfield; 8247 } else { 8248 nedcfield = 0; 8249 PetscCheck(!nisdofs,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%" PetscInt_FMT ")",nisdofs); 8250 nisdofs = 1; 8251 } 8252 } 8253 nisneu = !!pcbddc->NeumannBoundariesLocal; 8254 nisvert = 0; /* nisvert is not used */ 8255 nis = nisdofs + nisneu + nisvert; 8256 PetscCall(PetscMalloc1(nis,&isarray)); 8257 /* dofs splitting */ 8258 for (i=0;i<nisdofs;i++) { 8259 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8260 if (nedcfield != i) { 8261 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize)); 8262 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8263 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8264 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8265 } else { 8266 PetscCall(ISGetLocalSize(pcbddc->nedclocal,&tsize)); 8267 PetscCall(ISGetIndices(pcbddc->nedclocal,&idxs)); 8268 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8269 PetscCheck(tsize == nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT,tsize,nout); 8270 PetscCall(ISRestoreIndices(pcbddc->nedclocal,&idxs)); 8271 } 8272 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8273 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i])); 8274 /* PetscCall(ISView(isarray[i],0)); */ 8275 } 8276 /* neumann boundaries */ 8277 if (pcbddc->NeumannBoundariesLocal) { 8278 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8279 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize)); 8280 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8281 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8282 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8283 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8284 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs])); 8285 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8286 } 8287 /* coordinates */ 8288 if (pcbddc->corner_selected) { 8289 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8290 PetscCall(ISGetLocalSize(corners,&tsize)); 8291 PetscCall(ISGetIndices(corners,&idxs)); 8292 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8293 PetscCheck(tsize == nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT,tsize,nout); 8294 PetscCall(ISRestoreIndices(corners,&idxs)); 8295 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8296 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners)); 8298 } 8299 PetscCall(PetscFree(tidxs)); 8300 PetscCall(PetscFree(tidxs2)); 8301 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8302 } else { 8303 nis = 0; 8304 nisdofs = 0; 8305 nisneu = 0; 8306 nisvert = 0; 8307 isarray = NULL; 8308 } 8309 /* destroy no longer needed map */ 8310 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8311 8312 /* subassemble */ 8313 if (multilevel_allowed) { 8314 Vec vp[1]; 8315 PetscInt nvecs = 0; 8316 PetscBool reuse,reuser; 8317 8318 if (coarse_mat) reuse = PETSC_TRUE; 8319 else reuse = PETSC_FALSE; 8320 PetscCall(MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8321 vp[0] = NULL; 8322 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8323 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&vp[0])); 8324 PetscCall(VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE)); 8325 PetscCall(VecSetType(vp[0],VECSTANDARD)); 8326 nvecs = 1; 8327 8328 if (pcbddc->divudotp) { 8329 Mat B,loc_divudotp; 8330 Vec v,p; 8331 IS dummy; 8332 PetscInt np; 8333 8334 PetscCall(MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp)); 8335 PetscCall(MatGetSize(loc_divudotp,&np,NULL)); 8336 PetscCall(ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy)); 8337 PetscCall(MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B)); 8338 PetscCall(MatCreateVecs(B,&v,&p)); 8339 PetscCall(VecSet(p,1.)); 8340 PetscCall(MatMultTranspose(B,p,v)); 8341 PetscCall(VecDestroy(&p)); 8342 PetscCall(MatDestroy(&B)); 8343 PetscCall(VecGetArray(vp[0],&array)); 8344 PetscCall(VecPlaceArray(pcbddc->vec1_P,array)); 8345 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P)); 8346 PetscCall(VecResetArray(pcbddc->vec1_P)); 8347 PetscCall(VecRestoreArray(vp[0],&array)); 8348 PetscCall(ISDestroy(&dummy)); 8349 PetscCall(VecDestroy(&v)); 8350 } 8351 } 8352 if (reuser) { 8353 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp)); 8354 } else { 8355 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp)); 8356 } 8357 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8358 PetscScalar *arraym; 8359 const PetscScalar *arrayv; 8360 PetscInt nl; 8361 PetscCall(VecGetLocalSize(vp[0],&nl)); 8362 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp)); 8363 PetscCall(MatDenseGetArray(coarsedivudotp,&arraym)); 8364 PetscCall(VecGetArrayRead(vp[0],&arrayv)); 8365 PetscCall(PetscArraycpy(arraym,arrayv,nl)); 8366 PetscCall(VecRestoreArrayRead(vp[0],&arrayv)); 8367 PetscCall(MatDenseRestoreArray(coarsedivudotp,&arraym)); 8368 PetscCall(VecDestroy(&vp[0])); 8369 } else { 8370 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp)); 8371 } 8372 } else { 8373 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL)); 8374 } 8375 if (coarse_mat_is || coarse_mat) { 8376 if (!multilevel_allowed) { 8377 PetscCall(MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat)); 8378 } else { 8379 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8380 if (coarse_mat_is) { 8381 PetscCheck(!coarse_mat,PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8382 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8383 coarse_mat = coarse_mat_is; 8384 } 8385 } 8386 } 8387 PetscCall(MatDestroy(&t_coarse_mat_is)); 8388 PetscCall(MatDestroy(&coarse_mat_is)); 8389 8390 /* create local to global scatters for coarse problem */ 8391 if (compute_vecs) { 8392 PetscInt lrows; 8393 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8394 if (coarse_mat) { 8395 PetscCall(MatGetLocalSize(coarse_mat,&lrows,NULL)); 8396 } else { 8397 lrows = 0; 8398 } 8399 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec)); 8400 PetscCall(VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE)); 8401 PetscCall(VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8402 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8403 PetscCall(VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob)); 8404 } 8405 PetscCall(ISDestroy(&coarse_is)); 8406 8407 /* set defaults for coarse KSP and PC */ 8408 if (multilevel_allowed) { 8409 coarse_ksp_type = KSPRICHARDSON; 8410 coarse_pc_type = PCBDDC; 8411 } else { 8412 coarse_ksp_type = KSPPREONLY; 8413 coarse_pc_type = PCREDUNDANT; 8414 } 8415 8416 /* print some info if requested */ 8417 if (pcbddc->dbg_flag) { 8418 if (!multilevel_allowed) { 8419 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8420 if (multilevel_requested) { 8421 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio)); 8422 } else if (pcbddc->max_levels) { 8423 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%" PetscInt_FMT ")\n",pcbddc->max_levels)); 8424 } 8425 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8426 } 8427 } 8428 8429 /* communicate coarse discrete gradient */ 8430 coarseG = NULL; 8431 if (pcbddc->nedcG && multilevel_allowed) { 8432 MPI_Comm ccomm; 8433 if (coarse_mat) { 8434 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8435 } else { 8436 ccomm = MPI_COMM_NULL; 8437 } 8438 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG)); 8439 } 8440 8441 /* create the coarse KSP object only once with defaults */ 8442 if (coarse_mat) { 8443 PetscBool isredundant,isbddc,force,valid; 8444 PetscViewer dbg_viewer = NULL; 8445 8446 if (pcbddc->dbg_flag) { 8447 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8448 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level)); 8449 } 8450 if (!pcbddc->coarse_ksp) { 8451 char prefix[256],str_level[16]; 8452 size_t len; 8453 8454 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp)); 8455 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure)); 8456 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1)); 8457 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1)); 8458 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8459 PetscCall(KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type)); 8460 PetscCall(KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE)); 8461 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8462 /* TODO is this logic correct? should check for coarse_mat type */ 8463 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8464 /* prefix */ 8465 PetscCall(PetscStrcpy(prefix,"")); 8466 PetscCall(PetscStrcpy(str_level,"")); 8467 if (!pcbddc->current_level) { 8468 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix))); 8469 PetscCall(PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix))); 8470 } else { 8471 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 8472 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8473 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8474 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8475 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1)); 8476 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 8477 PetscCall(PetscStrlcat(prefix,str_level,sizeof(prefix))); 8478 } 8479 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix)); 8480 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8481 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8482 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8483 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8484 /* allow user customization */ 8485 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8486 /* get some info after set from options */ 8487 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8488 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8489 force = PETSC_FALSE; 8490 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8491 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8492 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8493 if (multilevel_allowed && !force && !valid) { 8494 isbddc = PETSC_TRUE; 8495 PetscCall(PCSetType(pc_temp,PCBDDC)); 8496 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8497 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8498 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8499 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8500 PetscObjectOptionsBegin((PetscObject)pc_temp); 8501 PetscCall((*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp)); 8502 PetscCall(PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp)); 8503 PetscOptionsEnd(); 8504 pc_temp->setfromoptionscalled++; 8505 } 8506 } 8507 } 8508 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8509 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8510 if (nisdofs) { 8511 PetscCall(PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray)); 8512 for (i=0;i<nisdofs;i++) { 8513 PetscCall(ISDestroy(&isarray[i])); 8514 } 8515 } 8516 if (nisneu) { 8517 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs])); 8518 PetscCall(ISDestroy(&isarray[nisdofs])); 8519 } 8520 if (nisvert) { 8521 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1])); 8522 PetscCall(ISDestroy(&isarray[nis-1])); 8523 } 8524 if (coarseG) { 8525 PetscCall(PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE)); 8526 } 8527 8528 /* get some info after set from options */ 8529 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8530 8531 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8532 if (isbddc && !multilevel_allowed) { 8533 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8534 } 8535 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8536 force = PETSC_FALSE; 8537 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8538 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8539 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8540 PetscCall(PCSetType(pc_temp,PCBDDC)); 8541 } 8542 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant)); 8543 if (isredundant) { 8544 KSP inner_ksp; 8545 PC inner_pc; 8546 8547 PetscCall(PCRedundantGetKSP(pc_temp,&inner_ksp)); 8548 PetscCall(KSPGetPC(inner_ksp,&inner_pc)); 8549 } 8550 8551 /* parameters which miss an API */ 8552 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8553 if (isbddc) { 8554 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8555 8556 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8557 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8558 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8559 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8560 if (pcbddc_coarse->benign_saddle_point) { 8561 Mat coarsedivudotp_is; 8562 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8563 IS row,col; 8564 const PetscInt *gidxs; 8565 PetscInt n,st,M,N; 8566 8567 PetscCall(MatGetSize(coarsedivudotp,&n,NULL)); 8568 PetscCallMPI(MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat))); 8569 st = st-n; 8570 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row)); 8571 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL)); 8572 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap,&n)); 8573 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 8574 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col)); 8575 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 8576 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 8577 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 8578 PetscCall(ISGetSize(row,&M)); 8579 PetscCall(MatGetSize(coarse_mat,&N,NULL)); 8580 PetscCall(ISDestroy(&row)); 8581 PetscCall(ISDestroy(&col)); 8582 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is)); 8583 PetscCall(MatSetType(coarsedivudotp_is,MATIS)); 8584 PetscCall(MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N)); 8585 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g)); 8586 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8587 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8588 PetscCall(MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp)); 8589 PetscCall(MatDestroy(&coarsedivudotp)); 8590 PetscCall(PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL)); 8591 PetscCall(MatDestroy(&coarsedivudotp_is)); 8592 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8593 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8594 } 8595 } 8596 8597 /* propagate symmetry info of coarse matrix */ 8598 PetscCall(MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 8599 if (pc->pmat->symmetric_set) { 8600 PetscCall(MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric)); 8601 } 8602 if (pc->pmat->hermitian_set) { 8603 PetscCall(MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian)); 8604 } 8605 if (pc->pmat->spd_set) { 8606 PetscCall(MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd)); 8607 } 8608 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8609 PetscCall(MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE)); 8610 } 8611 /* set operators */ 8612 PetscCall(MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view")); 8613 PetscCall(MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix)); 8614 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8615 if (pcbddc->dbg_flag) { 8616 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level)); 8617 } 8618 } 8619 PetscCall(MatDestroy(&coarseG)); 8620 PetscCall(PetscFree(isarray)); 8621 #if 0 8622 { 8623 PetscViewer viewer; 8624 char filename[256]; 8625 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8626 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8627 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8628 PetscCall(MatView(coarse_mat,viewer)); 8629 PetscCall(PetscViewerPopFormat(viewer)); 8630 PetscCall(PetscViewerDestroy(&viewer)); 8631 } 8632 #endif 8633 8634 if (corners) { 8635 Vec gv; 8636 IS is; 8637 const PetscInt *idxs; 8638 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8639 PetscScalar *coords; 8640 8641 PetscCheck(pcbddc->mat_graph->cloc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8642 PetscCall(VecGetSize(pcbddc->coarse_vec,&N)); 8643 PetscCall(VecGetLocalSize(pcbddc->coarse_vec,&n)); 8644 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv)); 8645 PetscCall(VecSetBlockSize(gv,cdim)); 8646 PetscCall(VecSetSizes(gv,n*cdim,N*cdim)); 8647 PetscCall(VecSetType(gv,VECSTANDARD)); 8648 PetscCall(VecSetFromOptions(gv)); 8649 PetscCall(VecSet(gv,PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8650 8651 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8652 PetscCall(ISGetLocalSize(is,&n)); 8653 PetscCall(ISGetIndices(is,&idxs)); 8654 PetscCall(PetscMalloc1(n*cdim,&coords)); 8655 for (i=0;i<n;i++) { 8656 for (d=0;d<cdim;d++) { 8657 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8658 } 8659 } 8660 PetscCall(ISRestoreIndices(is,&idxs)); 8661 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8662 8663 PetscCall(ISGetLocalSize(corners,&n)); 8664 PetscCall(ISGetIndices(corners,&idxs)); 8665 PetscCall(VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES)); 8666 PetscCall(ISRestoreIndices(corners,&idxs)); 8667 PetscCall(PetscFree(coords)); 8668 PetscCall(VecAssemblyBegin(gv)); 8669 PetscCall(VecAssemblyEnd(gv)); 8670 PetscCall(VecGetArray(gv,&coords)); 8671 if (pcbddc->coarse_ksp) { 8672 PC coarse_pc; 8673 PetscBool isbddc; 8674 8675 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 8676 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 8677 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8678 PetscReal *realcoords; 8679 8680 PetscCall(VecGetLocalSize(gv,&n)); 8681 #if defined(PETSC_USE_COMPLEX) 8682 PetscCall(PetscMalloc1(n,&realcoords)); 8683 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8684 #else 8685 realcoords = coords; 8686 #endif 8687 PetscCall(PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords)); 8688 #if defined(PETSC_USE_COMPLEX) 8689 PetscCall(PetscFree(realcoords)); 8690 #endif 8691 } 8692 } 8693 PetscCall(VecRestoreArray(gv,&coords)); 8694 PetscCall(VecDestroy(&gv)); 8695 } 8696 PetscCall(ISDestroy(&corners)); 8697 8698 if (pcbddc->coarse_ksp) { 8699 Vec crhs,csol; 8700 8701 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&csol)); 8702 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&crhs)); 8703 if (!csol) { 8704 PetscCall(MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL)); 8705 } 8706 if (!crhs) { 8707 PetscCall(MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs))); 8708 } 8709 } 8710 PetscCall(MatDestroy(&coarsedivudotp)); 8711 8712 /* compute null space for coarse solver if the benign trick has been requested */ 8713 if (pcbddc->benign_null) { 8714 8715 PetscCall(VecSet(pcbddc->vec1_P,0.)); 8716 for (i=0;i<pcbddc->benign_n;i++) { 8717 PetscCall(VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES)); 8718 } 8719 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8720 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8721 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8722 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8723 if (coarse_mat) { 8724 Vec nullv; 8725 PetscScalar *array,*array2; 8726 PetscInt nl; 8727 8728 PetscCall(MatCreateVecs(coarse_mat,&nullv,NULL)); 8729 PetscCall(VecGetLocalSize(nullv,&nl)); 8730 PetscCall(VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8731 PetscCall(VecGetArray(nullv,&array2)); 8732 PetscCall(PetscArraycpy(array2,array,nl)); 8733 PetscCall(VecRestoreArray(nullv,&array2)); 8734 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8735 PetscCall(VecNormalize(nullv,NULL)); 8736 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace)); 8737 PetscCall(VecDestroy(&nullv)); 8738 } 8739 } 8740 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8741 8742 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8743 if (pcbddc->coarse_ksp) { 8744 PetscBool ispreonly; 8745 8746 if (CoarseNullSpace) { 8747 PetscBool isnull; 8748 PetscCall(MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull)); 8749 if (isnull) { 8750 PetscCall(MatSetNullSpace(coarse_mat,CoarseNullSpace)); 8751 } 8752 /* TODO: add local nullspaces (if any) */ 8753 } 8754 /* setup coarse ksp */ 8755 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8756 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8757 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly)); 8758 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8759 KSP check_ksp; 8760 KSPType check_ksp_type; 8761 PC check_pc; 8762 Vec check_vec,coarse_vec; 8763 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8764 PetscInt its; 8765 PetscBool compute_eigs; 8766 PetscReal *eigs_r,*eigs_c; 8767 PetscInt neigs; 8768 const char *prefix; 8769 8770 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8771 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp)); 8772 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0)); 8773 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE)); 8774 PetscCall(KSPSetOperators(check_ksp,coarse_mat,coarse_mat)); 8775 PetscCall(KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size)); 8776 /* prevent from setup unneeded object */ 8777 PetscCall(KSPGetPC(check_ksp,&check_pc)); 8778 PetscCall(PCSetType(check_pc,PCNONE)); 8779 if (ispreonly) { 8780 check_ksp_type = KSPPREONLY; 8781 compute_eigs = PETSC_FALSE; 8782 } else { 8783 check_ksp_type = KSPGMRES; 8784 compute_eigs = PETSC_TRUE; 8785 } 8786 PetscCall(KSPSetType(check_ksp,check_ksp_type)); 8787 PetscCall(KSPSetComputeSingularValues(check_ksp,compute_eigs)); 8788 PetscCall(KSPSetComputeEigenvalues(check_ksp,compute_eigs)); 8789 PetscCall(KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1)); 8790 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix)); 8791 PetscCall(KSPSetOptionsPrefix(check_ksp,prefix)); 8792 PetscCall(KSPAppendOptionsPrefix(check_ksp,"check_")); 8793 PetscCall(KSPSetFromOptions(check_ksp)); 8794 PetscCall(KSPSetUp(check_ksp)); 8795 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&check_pc)); 8796 PetscCall(KSPSetPC(check_ksp,check_pc)); 8797 /* create random vec */ 8798 PetscCall(MatCreateVecs(coarse_mat,&coarse_vec,&check_vec)); 8799 PetscCall(VecSetRandom(check_vec,NULL)); 8800 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8801 /* solve coarse problem */ 8802 PetscCall(KSPSolve(check_ksp,coarse_vec,coarse_vec)); 8803 PetscCall(KSPCheckSolve(check_ksp,pc,coarse_vec)); 8804 /* set eigenvalue estimation if preonly has not been requested */ 8805 if (compute_eigs) { 8806 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_r)); 8807 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_c)); 8808 PetscCall(KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs)); 8809 if (neigs) { 8810 lambda_max = eigs_r[neigs-1]; 8811 lambda_min = eigs_r[0]; 8812 if (pcbddc->use_coarse_estimates) { 8813 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8814 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min)); 8815 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min))); 8816 } 8817 } 8818 } 8819 } 8820 8821 /* check coarse problem residual error */ 8822 if (pcbddc->dbg_flag) { 8823 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8824 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1))); 8825 PetscCall(VecAXPY(check_vec,-1.0,coarse_vec)); 8826 PetscCall(VecNorm(check_vec,NORM_INFINITY,&infty_error)); 8827 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8828 PetscCall(VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error)); 8829 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates)); 8830 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer)); 8831 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer)); 8832 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",(double)infty_error)); 8833 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",(double)abs_infty_error)); 8834 if (CoarseNullSpace) { 8835 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n")); 8836 } 8837 if (compute_eigs) { 8838 PetscReal lambda_max_s,lambda_min_s; 8839 KSPConvergedReason reason; 8840 PetscCall(KSPGetType(check_ksp,&check_ksp_type)); 8841 PetscCall(KSPGetIterationNumber(check_ksp,&its)); 8842 PetscCall(KSPGetConvergedReason(check_ksp,&reason)); 8843 PetscCall(KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s)); 8844 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,(double)lambda_min,(double)lambda_max,(double)lambda_min_s,(double)lambda_max_s)); 8845 for (i=0;i<neigs;i++) { 8846 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",(double)eigs_r[i],(double)eigs_c[i])); 8847 } 8848 } 8849 PetscCall(PetscViewerFlush(dbg_viewer)); 8850 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1))); 8851 } 8852 PetscCall(VecDestroy(&check_vec)); 8853 PetscCall(VecDestroy(&coarse_vec)); 8854 PetscCall(KSPDestroy(&check_ksp)); 8855 if (compute_eigs) { 8856 PetscCall(PetscFree(eigs_r)); 8857 PetscCall(PetscFree(eigs_c)); 8858 } 8859 } 8860 } 8861 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8862 /* print additional info */ 8863 if (pcbddc->dbg_flag) { 8864 /* waits until all processes reaches this point */ 8865 PetscCall(PetscBarrier((PetscObject)pc)); 8866 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %" PetscInt_FMT "\n",pcbddc->current_level)); 8867 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8868 } 8869 8870 /* free memory */ 8871 PetscCall(MatDestroy(&coarse_mat)); 8872 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8873 PetscFunctionReturn(0); 8874 } 8875 8876 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8877 { 8878 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8879 PC_IS* pcis = (PC_IS*)pc->data; 8880 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8881 IS subset,subset_mult,subset_n; 8882 PetscInt local_size,coarse_size=0; 8883 PetscInt *local_primal_indices=NULL; 8884 const PetscInt *t_local_primal_indices; 8885 8886 PetscFunctionBegin; 8887 /* Compute global number of coarse dofs */ 8888 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8889 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n)); 8890 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset)); 8891 PetscCall(ISDestroy(&subset_n)); 8892 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult)); 8893 PetscCall(ISRenumber(subset,subset_mult,&coarse_size,&subset_n)); 8894 PetscCall(ISDestroy(&subset)); 8895 PetscCall(ISDestroy(&subset_mult)); 8896 PetscCall(ISGetLocalSize(subset_n,&local_size)); 8897 PetscCheck(local_size == pcbddc->local_primal_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT,local_size,pcbddc->local_primal_size); 8898 PetscCall(PetscMalloc1(local_size,&local_primal_indices)); 8899 PetscCall(ISGetIndices(subset_n,&t_local_primal_indices)); 8900 PetscCall(PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size)); 8901 PetscCall(ISRestoreIndices(subset_n,&t_local_primal_indices)); 8902 PetscCall(ISDestroy(&subset_n)); 8903 8904 /* check numbering */ 8905 if (pcbddc->dbg_flag) { 8906 PetscScalar coarsesum,*array,*array2; 8907 PetscInt i; 8908 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8909 8910 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8911 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8912 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n")); 8913 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8914 /* counter */ 8915 PetscCall(VecSet(pcis->vec1_global,0.0)); 8916 PetscCall(VecSet(pcis->vec1_N,1.0)); 8917 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8918 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8919 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8920 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8921 PetscCall(VecSet(pcis->vec1_N,0.0)); 8922 for (i=0;i<pcbddc->local_primal_size;i++) { 8923 PetscCall(VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES)); 8924 } 8925 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8926 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8927 PetscCall(VecSet(pcis->vec1_global,0.0)); 8928 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8929 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8930 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8931 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8932 PetscCall(VecGetArray(pcis->vec1_N,&array)); 8933 PetscCall(VecGetArray(pcis->vec2_N,&array2)); 8934 for (i=0;i<pcis->n;i++) { 8935 if (array[i] != 0.0 && array[i] != array2[i]) { 8936 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8937 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8938 set_error = PETSC_TRUE; 8939 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi)); 8940 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n",PetscGlobalRank,i,gi,owned,neigh)); 8941 } 8942 } 8943 PetscCall(VecRestoreArray(pcis->vec2_N,&array2)); 8944 PetscCall(MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8945 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8946 for (i=0;i<pcis->n;i++) { 8947 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8948 } 8949 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 8950 PetscCall(VecSet(pcis->vec1_global,0.0)); 8951 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8952 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8953 PetscCall(VecSum(pcis->vec1_global,&coarsesum)); 8954 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %" PetscInt_FMT " (%lf)\n",coarse_size,(double)PetscRealPart(coarsesum))); 8955 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8956 PetscInt *gidxs; 8957 8958 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&gidxs)); 8959 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs)); 8960 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n")); 8961 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8962 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank)); 8963 for (i=0;i<pcbddc->local_primal_size;i++) { 8964 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i])); 8965 } 8966 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8967 PetscCall(PetscFree(gidxs)); 8968 } 8969 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8970 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8971 PetscCheck(!set_error_reduced,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8972 } 8973 8974 /* get back data */ 8975 *coarse_size_n = coarse_size; 8976 *local_primal_indices_n = local_primal_indices; 8977 PetscFunctionReturn(0); 8978 } 8979 8980 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8981 { 8982 IS localis_t; 8983 PetscInt i,lsize,*idxs,n; 8984 PetscScalar *vals; 8985 8986 PetscFunctionBegin; 8987 /* get indices in local ordering exploiting local to global map */ 8988 PetscCall(ISGetLocalSize(globalis,&lsize)); 8989 PetscCall(PetscMalloc1(lsize,&vals)); 8990 for (i=0;i<lsize;i++) vals[i] = 1.0; 8991 PetscCall(ISGetIndices(globalis,(const PetscInt**)&idxs)); 8992 PetscCall(VecSet(gwork,0.0)); 8993 PetscCall(VecSet(lwork,0.0)); 8994 if (idxs) { /* multilevel guard */ 8995 PetscCall(VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE)); 8996 PetscCall(VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES)); 8997 } 8998 PetscCall(VecAssemblyBegin(gwork)); 8999 PetscCall(ISRestoreIndices(globalis,(const PetscInt**)&idxs)); 9000 PetscCall(PetscFree(vals)); 9001 PetscCall(VecAssemblyEnd(gwork)); 9002 /* now compute set in local ordering */ 9003 PetscCall(VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 9004 PetscCall(VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 9005 PetscCall(VecGetArrayRead(lwork,(const PetscScalar**)&vals)); 9006 PetscCall(VecGetSize(lwork,&n)); 9007 for (i=0,lsize=0;i<n;i++) { 9008 if (PetscRealPart(vals[i]) > 0.5) { 9009 lsize++; 9010 } 9011 } 9012 PetscCall(PetscMalloc1(lsize,&idxs)); 9013 for (i=0,lsize=0;i<n;i++) { 9014 if (PetscRealPart(vals[i]) > 0.5) { 9015 idxs[lsize++] = i; 9016 } 9017 } 9018 PetscCall(VecRestoreArrayRead(lwork,(const PetscScalar**)&vals)); 9019 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t)); 9020 *localis = localis_t; 9021 PetscFunctionReturn(0); 9022 } 9023 9024 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9025 { 9026 PC_IS *pcis=(PC_IS*)pc->data; 9027 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9028 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9029 Mat S_j; 9030 PetscInt *used_xadj,*used_adjncy; 9031 PetscBool free_used_adj; 9032 9033 PetscFunctionBegin; 9034 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9035 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9036 free_used_adj = PETSC_FALSE; 9037 if (pcbddc->sub_schurs_layers == -1) { 9038 used_xadj = NULL; 9039 used_adjncy = NULL; 9040 } else { 9041 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9042 used_xadj = pcbddc->mat_graph->xadj; 9043 used_adjncy = pcbddc->mat_graph->adjncy; 9044 } else if (pcbddc->computed_rowadj) { 9045 used_xadj = pcbddc->mat_graph->xadj; 9046 used_adjncy = pcbddc->mat_graph->adjncy; 9047 } else { 9048 PetscBool flg_row=PETSC_FALSE; 9049 const PetscInt *xadj,*adjncy; 9050 PetscInt nvtxs; 9051 9052 PetscCall(MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9053 if (flg_row) { 9054 PetscCall(PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy)); 9055 PetscCall(PetscArraycpy(used_xadj,xadj,nvtxs+1)); 9056 PetscCall(PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs])); 9057 free_used_adj = PETSC_TRUE; 9058 } else { 9059 pcbddc->sub_schurs_layers = -1; 9060 used_xadj = NULL; 9061 used_adjncy = NULL; 9062 } 9063 PetscCall(MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9064 } 9065 } 9066 9067 /* setup sub_schurs data */ 9068 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9069 if (!sub_schurs->schur_explicit) { 9070 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9071 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9072 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL)); 9073 } else { 9074 Mat change = NULL; 9075 Vec scaling = NULL; 9076 IS change_primal = NULL, iP; 9077 PetscInt benign_n; 9078 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9079 PetscBool need_change = PETSC_FALSE; 9080 PetscBool discrete_harmonic = PETSC_FALSE; 9081 9082 if (!pcbddc->use_vertices && reuse_solvers) { 9083 PetscInt n_vertices; 9084 9085 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 9086 reuse_solvers = (PetscBool)!n_vertices; 9087 } 9088 if (!pcbddc->benign_change_explicit) { 9089 benign_n = pcbddc->benign_n; 9090 } else { 9091 benign_n = 0; 9092 } 9093 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9094 We need a global reduction to avoid possible deadlocks. 9095 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9096 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9097 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9098 PetscCall(MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 9099 need_change = (PetscBool)(!need_change); 9100 } 9101 /* If the user defines additional constraints, we import them here. 9102 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 9103 if (need_change) { 9104 PC_IS *pcisf; 9105 PC_BDDC *pcbddcf; 9106 PC pcf; 9107 9108 PetscCheck(!pcbddc->sub_schurs_rebuild,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9109 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc),&pcf)); 9110 PetscCall(PCSetOperators(pcf,pc->mat,pc->pmat)); 9111 PetscCall(PCSetType(pcf,PCBDDC)); 9112 9113 /* hacks */ 9114 pcisf = (PC_IS*)pcf->data; 9115 pcisf->is_B_local = pcis->is_B_local; 9116 pcisf->vec1_N = pcis->vec1_N; 9117 pcisf->BtoNmap = pcis->BtoNmap; 9118 pcisf->n = pcis->n; 9119 pcisf->n_B = pcis->n_B; 9120 pcbddcf = (PC_BDDC*)pcf->data; 9121 PetscCall(PetscFree(pcbddcf->mat_graph)); 9122 pcbddcf->mat_graph = pcbddc->mat_graph; 9123 pcbddcf->use_faces = PETSC_TRUE; 9124 pcbddcf->use_change_of_basis = PETSC_TRUE; 9125 pcbddcf->use_change_on_faces = PETSC_TRUE; 9126 pcbddcf->use_qr_single = PETSC_TRUE; 9127 pcbddcf->fake_change = PETSC_TRUE; 9128 9129 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 9130 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9131 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 9132 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal)); 9133 change = pcbddcf->ConstraintMatrix; 9134 pcbddcf->ConstraintMatrix = NULL; 9135 9136 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 9137 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9138 PetscCall(MatNullSpaceDestroy(&pcbddcf->onearnullspace)); 9139 PetscCall(PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult)); 9140 PetscCall(PetscFree(pcbddcf->primal_indices_local_idxs)); 9141 PetscCall(PetscFree(pcbddcf->onearnullvecs_state)); 9142 PetscCall(PetscFree(pcf->data)); 9143 pcf->ops->destroy = NULL; 9144 pcf->ops->reset = NULL; 9145 PetscCall(PCDestroy(&pcf)); 9146 } 9147 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9148 9149 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP)); 9150 if (iP) { 9151 PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC"); 9152 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL)); 9153 PetscOptionsEnd(); 9154 } 9155 if (discrete_harmonic) { 9156 Mat A; 9157 PetscCall(MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A)); 9158 PetscCall(MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL)); 9159 PetscCall(PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP)); 9160 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9161 PetscCall(MatDestroy(&A)); 9162 } else { 9163 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9164 } 9165 PetscCall(MatDestroy(&change)); 9166 PetscCall(ISDestroy(&change_primal)); 9167 } 9168 PetscCall(MatDestroy(&S_j)); 9169 9170 /* free adjacency */ 9171 if (free_used_adj) { 9172 PetscCall(PetscFree2(used_xadj,used_adjncy)); 9173 } 9174 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9175 PetscFunctionReturn(0); 9176 } 9177 9178 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9179 { 9180 PC_IS *pcis=(PC_IS*)pc->data; 9181 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9182 PCBDDCGraph graph; 9183 9184 PetscFunctionBegin; 9185 /* attach interface graph for determining subsets */ 9186 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9187 IS verticesIS,verticescomm; 9188 PetscInt vsize,*idxs; 9189 9190 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9191 PetscCall(ISGetSize(verticesIS,&vsize)); 9192 PetscCall(ISGetIndices(verticesIS,(const PetscInt**)&idxs)); 9193 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm)); 9194 PetscCall(ISRestoreIndices(verticesIS,(const PetscInt**)&idxs)); 9195 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9196 PetscCall(PCBDDCGraphCreate(&graph)); 9197 PetscCall(PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount)); 9198 PetscCall(PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm)); 9199 PetscCall(ISDestroy(&verticescomm)); 9200 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9201 } else { 9202 graph = pcbddc->mat_graph; 9203 } 9204 /* print some info */ 9205 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9206 IS vertices; 9207 PetscInt nv,nedges,nfaces; 9208 PetscCall(PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 9209 PetscCall(PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9210 PetscCall(ISGetSize(vertices,&nv)); 9211 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9212 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 9213 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices)); 9214 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges)); 9215 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces)); 9216 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9217 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9218 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9219 } 9220 9221 /* sub_schurs init */ 9222 if (!pcbddc->sub_schurs) { 9223 PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9224 } 9225 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild)); 9226 9227 /* free graph struct */ 9228 if (pcbddc->sub_schurs_rebuild) { 9229 PetscCall(PCBDDCGraphDestroy(&graph)); 9230 } 9231 PetscFunctionReturn(0); 9232 } 9233 9234 PetscErrorCode PCBDDCCheckOperator(PC pc) 9235 { 9236 PC_IS *pcis=(PC_IS*)pc->data; 9237 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9238 9239 PetscFunctionBegin; 9240 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9241 IS zerodiag = NULL; 9242 Mat S_j,B0_B=NULL; 9243 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9244 PetscScalar *p0_check,*array,*array2; 9245 PetscReal norm; 9246 PetscInt i; 9247 9248 /* B0 and B0_B */ 9249 if (zerodiag) { 9250 IS dummy; 9251 9252 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy)); 9253 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 9254 PetscCall(MatCreateVecs(B0_B,NULL,&dummy_vec)); 9255 PetscCall(ISDestroy(&dummy)); 9256 } 9257 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9258 PetscCall(VecDuplicate(pcbddc->vec1_P,&vec_scale_P)); 9259 PetscCall(VecSet(pcbddc->vec1_P,1.0)); 9260 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9261 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9262 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9263 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9264 PetscCall(VecReciprocal(vec_scale_P)); 9265 /* S_j */ 9266 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9267 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9268 9269 /* mimic vector in \widetilde{W}_\Gamma */ 9270 PetscCall(VecSetRandom(pcis->vec1_N,NULL)); 9271 /* continuous in primal space */ 9272 PetscCall(VecSetRandom(pcbddc->coarse_vec,NULL)); 9273 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9274 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9275 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9276 PetscCall(PetscCalloc1(pcbddc->benign_n,&p0_check)); 9277 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9278 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9279 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9280 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9281 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9282 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9283 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9284 PetscCall(VecDuplicate(pcis->vec2_B,&vec_check_B)); 9285 PetscCall(VecCopy(pcis->vec2_B,vec_check_B)); 9286 9287 /* assemble rhs for coarse problem */ 9288 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9289 /* local with Schur */ 9290 PetscCall(MatMult(S_j,pcis->vec2_B,pcis->vec1_B)); 9291 if (zerodiag) { 9292 PetscCall(VecGetArray(dummy_vec,&array)); 9293 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9294 PetscCall(VecRestoreArray(dummy_vec,&array)); 9295 PetscCall(MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B)); 9296 } 9297 /* sum on primal nodes the local contributions */ 9298 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9299 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9300 PetscCall(VecGetArray(pcis->vec1_N,&array)); 9301 PetscCall(VecGetArray(pcbddc->vec1_P,&array2)); 9302 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9303 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array2)); 9304 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 9305 PetscCall(VecSet(pcbddc->coarse_vec,0.)); 9306 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9307 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9308 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9309 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9310 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9311 /* scale primal nodes (BDDC sums contibutions) */ 9312 PetscCall(VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P)); 9313 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9314 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9315 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9316 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9317 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9318 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9319 /* global: \widetilde{B0}_B w_\Gamma */ 9320 if (zerodiag) { 9321 PetscCall(MatMult(B0_B,pcis->vec2_B,dummy_vec)); 9322 PetscCall(VecGetArray(dummy_vec,&array)); 9323 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9324 PetscCall(VecRestoreArray(dummy_vec,&array)); 9325 } 9326 /* BDDC */ 9327 PetscCall(VecSet(pcis->vec1_D,0.)); 9328 PetscCall(PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE)); 9329 9330 PetscCall(VecCopy(pcis->vec1_B,pcis->vec2_B)); 9331 PetscCall(VecAXPY(pcis->vec1_B,-1.0,vec_check_B)); 9332 PetscCall(VecNorm(pcis->vec1_B,NORM_INFINITY,&norm)); 9333 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,(double)norm)); 9334 for (i=0;i<pcbddc->benign_n;i++) { 9335 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%" PetscInt_FMT "] error is %1.4e\n",PetscGlobalRank,i,(double)PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]))); 9336 } 9337 PetscCall(PetscFree(p0_check)); 9338 PetscCall(VecDestroy(&vec_scale_P)); 9339 PetscCall(VecDestroy(&vec_check_B)); 9340 PetscCall(VecDestroy(&dummy_vec)); 9341 PetscCall(MatDestroy(&S_j)); 9342 PetscCall(MatDestroy(&B0_B)); 9343 } 9344 PetscFunctionReturn(0); 9345 } 9346 9347 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9348 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9349 { 9350 Mat At; 9351 IS rows; 9352 PetscInt rst,ren; 9353 PetscLayout rmap; 9354 9355 PetscFunctionBegin; 9356 rst = ren = 0; 9357 if (ccomm != MPI_COMM_NULL) { 9358 PetscCall(PetscLayoutCreate(ccomm,&rmap)); 9359 PetscCall(PetscLayoutSetSize(rmap,A->rmap->N)); 9360 PetscCall(PetscLayoutSetBlockSize(rmap,1)); 9361 PetscCall(PetscLayoutSetUp(rmap)); 9362 PetscCall(PetscLayoutGetRange(rmap,&rst,&ren)); 9363 } 9364 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows)); 9365 PetscCall(MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At)); 9366 PetscCall(ISDestroy(&rows)); 9367 9368 if (ccomm != MPI_COMM_NULL) { 9369 Mat_MPIAIJ *a,*b; 9370 IS from,to; 9371 Vec gvec; 9372 PetscInt lsize; 9373 9374 PetscCall(MatCreate(ccomm,B)); 9375 PetscCall(MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N)); 9376 PetscCall(MatSetType(*B,MATAIJ)); 9377 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 9378 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9379 a = (Mat_MPIAIJ*)At->data; 9380 b = (Mat_MPIAIJ*)(*B)->data; 9381 PetscCallMPI(MPI_Comm_size(ccomm,&b->size)); 9382 PetscCallMPI(MPI_Comm_rank(ccomm,&b->rank)); 9383 PetscCall(PetscObjectReference((PetscObject)a->A)); 9384 PetscCall(PetscObjectReference((PetscObject)a->B)); 9385 b->A = a->A; 9386 b->B = a->B; 9387 9388 b->donotstash = a->donotstash; 9389 b->roworiented = a->roworiented; 9390 b->rowindices = NULL; 9391 b->rowvalues = NULL; 9392 b->getrowactive = PETSC_FALSE; 9393 9394 (*B)->rmap = rmap; 9395 (*B)->factortype = A->factortype; 9396 (*B)->assembled = PETSC_TRUE; 9397 (*B)->insertmode = NOT_SET_VALUES; 9398 (*B)->preallocated = PETSC_TRUE; 9399 9400 if (a->colmap) { 9401 #if defined(PETSC_USE_CTABLE) 9402 PetscCall(PetscTableCreateCopy(a->colmap,&b->colmap)); 9403 #else 9404 PetscCall(PetscMalloc1(At->cmap->N,&b->colmap)); 9405 PetscCall(PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt))); 9406 PetscCall(PetscArraycpy(b->colmap,a->colmap,At->cmap->N)); 9407 #endif 9408 } else b->colmap = NULL; 9409 if (a->garray) { 9410 PetscInt len; 9411 len = a->B->cmap->n; 9412 PetscCall(PetscMalloc1(len+1,&b->garray)); 9413 PetscCall(PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt))); 9414 if (len) PetscCall(PetscArraycpy(b->garray,a->garray,len)); 9415 } else b->garray = NULL; 9416 9417 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9418 b->lvec = a->lvec; 9419 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec)); 9420 9421 /* cannot use VecScatterCopy */ 9422 PetscCall(VecGetLocalSize(b->lvec,&lsize)); 9423 PetscCall(ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from)); 9424 PetscCall(ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to)); 9425 PetscCall(MatCreateVecs(*B,&gvec,NULL)); 9426 PetscCall(VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx)); 9427 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx)); 9428 PetscCall(ISDestroy(&from)); 9429 PetscCall(ISDestroy(&to)); 9430 PetscCall(VecDestroy(&gvec)); 9431 } 9432 PetscCall(MatDestroy(&At)); 9433 PetscFunctionReturn(0); 9434 } 9435