1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 PetscCall(MatGetSize(A,&nr,&nc)); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 PetscCall(PetscMalloc1(ulw,&uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 PetscCall(PetscMalloc1(n,&sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 PetscCall(PetscMalloc1(nr*nr,&U)); 46 PetscCall(PetscBLASIntCast(nr,&bM)); 47 PetscCall(PetscBLASIntCast(nc,&bN)); 48 PetscCall(PetscBLASIntCast(ulw,&lwork)); 49 PetscCall(MatDenseGetArray(A,&data)); 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 53 #else 54 PetscCall(PetscMalloc1(5*n,&rwork2)); 55 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 56 PetscCall(PetscFree(rwork2)); 57 #endif 58 PetscCall(PetscFPTrapPop()); 59 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 60 PetscCall(MatDenseRestoreArray(A,&data)); 61 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 62 if (!rwork) { 63 PetscCall(PetscFree(sing)); 64 } 65 if (!work) { 66 PetscCall(PetscFree(uwork)); 67 } 68 /* create B */ 69 if (!range) { 70 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B)); 71 PetscCall(MatDenseGetArray(*B,&data)); 72 PetscCall(PetscArraycpy(data,U+nr*i,(nr-i)*nr)); 73 } else { 74 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B)); 75 PetscCall(MatDenseGetArray(*B,&data)); 76 PetscCall(PetscArraycpy(data,U,i*nr)); 77 } 78 PetscCall(MatDenseRestoreArray(*B,&data)); 79 PetscCall(PetscFree(U)); 80 PetscFunctionReturn(0); 81 } 82 83 /* TODO REMOVE */ 84 #if defined(PRINT_GDET) 85 static int inc = 0; 86 static int lev = 0; 87 #endif 88 89 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 90 { 91 Mat GE,GEd; 92 PetscInt rsize,csize,esize; 93 PetscScalar *ptr; 94 95 PetscFunctionBegin; 96 PetscCall(ISGetSize(edge,&esize)); 97 if (!esize) PetscFunctionReturn(0); 98 PetscCall(ISGetSize(extrow,&rsize)); 99 PetscCall(ISGetSize(extcol,&csize)); 100 101 /* gradients */ 102 ptr = work + 5*esize; 103 PetscCall(MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE)); 104 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins)); 105 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins)); 106 PetscCall(MatDestroy(&GE)); 107 108 /* constants */ 109 ptr += rsize*csize; 110 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd)); 111 PetscCall(MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE)); 112 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd)); 113 PetscCall(MatDestroy(&GE)); 114 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins)); 115 PetscCall(MatDestroy(&GEd)); 116 117 if (corners) { 118 Mat GEc; 119 const PetscScalar *vals; 120 PetscScalar v; 121 122 PetscCall(MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc)); 123 PetscCall(MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd)); 124 PetscCall(MatDenseGetArrayRead(GEd,&vals)); 125 /* v = PetscAbsScalar(vals[0]) */; 126 v = 1.; 127 cvals[0] = vals[0]/v; 128 cvals[1] = vals[1]/v; 129 PetscCall(MatDenseRestoreArrayRead(GEd,&vals)); 130 PetscCall(MatScale(*GKins,1./v)); 131 #if defined(PRINT_GDET) 132 { 133 PetscViewer viewer; 134 char filename[256]; 135 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 136 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 137 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 138 PetscCall(PetscObjectSetName((PetscObject)GEc,"GEc")); 139 PetscCall(MatView(GEc,viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)(*GKins),"GK")); 141 PetscCall(MatView(*GKins,viewer)); 142 PetscCall(PetscObjectSetName((PetscObject)GEd,"Gproj")); 143 PetscCall(MatView(GEd,viewer)); 144 PetscCall(PetscViewerDestroy(&viewer)); 145 } 146 #endif 147 PetscCall(MatDestroy(&GEd)); 148 PetscCall(MatDestroy(&GEc)); 149 } 150 151 PetscFunctionReturn(0); 152 } 153 154 PetscErrorCode PCBDDCNedelecSupport(PC pc) 155 { 156 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 157 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 158 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 159 Vec tvec; 160 PetscSF sfv; 161 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 162 MPI_Comm comm; 163 IS lned,primals,allprimals,nedfieldlocal; 164 IS *eedges,*extrows,*extcols,*alleedges; 165 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 166 PetscScalar *vals,*work; 167 PetscReal *rwork; 168 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 169 PetscInt ne,nv,Lv,order,n,field; 170 PetscInt n_neigh,*neigh,*n_shared,**shared; 171 PetscInt i,j,extmem,cum,maxsize,nee; 172 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 173 PetscInt *sfvleaves,*sfvroots; 174 PetscInt *corners,*cedges; 175 PetscInt *ecount,**eneighs,*vcount,**vneighs; 176 PetscInt *emarks; 177 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 178 179 PetscFunctionBegin; 180 /* If the discrete gradient is defined for a subset of dofs and global is true, 181 it assumes G is given in global ordering for all the dofs. 182 Otherwise, the ordering is global for the Nedelec field */ 183 order = pcbddc->nedorder; 184 conforming = pcbddc->conforming; 185 field = pcbddc->nedfield; 186 global = pcbddc->nedglobal; 187 setprimal = PETSC_FALSE; 188 print = PETSC_FALSE; 189 singular = PETSC_FALSE; 190 191 /* Command line customization */ 192 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC"); 193 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL)); 194 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL)); 195 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL)); 196 /* print debug info TODO: to be removed */ 197 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL)); 198 PetscOptionsEnd(); 199 200 /* Return if there are no edges in the decomposition and the problem is not singular */ 201 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&al2g,NULL)); 202 PetscCall(ISLocalToGlobalMappingGetSize(al2g,&n)); 203 PetscCall(PetscObjectGetComm((PetscObject)pc,&comm)); 204 if (!singular) { 205 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 206 lrc[0] = PETSC_FALSE; 207 for (i=0;i<n;i++) { 208 if (PetscRealPart(vals[i]) > 2.) { 209 lrc[0] = PETSC_TRUE; 210 break; 211 } 212 } 213 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 214 PetscCall(MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm)); 215 if (!lrc[1]) PetscFunctionReturn(0); 216 } 217 218 /* Get Nedelec field */ 219 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal,comm,PETSC_ERR_USER,"Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT,field,pcbddc->n_ISForDofsLocal); 220 if (pcbddc->n_ISForDofsLocal && field >= 0) { 221 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 222 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 223 PetscCall(ISGetLocalSize(nedfieldlocal,&ne)); 224 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 225 ne = n; 226 nedfieldlocal = NULL; 227 global = PETSC_TRUE; 228 } else if (field == PETSC_DECIDE) { 229 PetscInt rst,ren,*idx; 230 231 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 232 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 233 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren)); 234 for (i=rst;i<ren;i++) { 235 PetscInt nc; 236 237 PetscCall(MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 238 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 239 PetscCall(MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 240 } 241 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 242 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 243 PetscCall(PetscMalloc1(n,&idx)); 244 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 245 PetscCall(ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal)); 246 } else { 247 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 248 } 249 250 /* Sanity checks */ 251 PetscCheck(order || conforming,comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 252 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix,comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 253 PetscCheck(!order || (ne%order == 0),PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT,ne,order); 254 255 /* Just set primal dofs and return */ 256 if (setprimal) { 257 IS enedfieldlocal; 258 PetscInt *eidxs; 259 260 PetscCall(PetscMalloc1(ne,&eidxs)); 261 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 262 if (nedfieldlocal) { 263 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 264 for (i=0,cum=0;i<ne;i++) { 265 if (PetscRealPart(vals[idxs[i]]) > 2.) { 266 eidxs[cum++] = idxs[i]; 267 } 268 } 269 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 270 } else { 271 for (i=0,cum=0;i<ne;i++) { 272 if (PetscRealPart(vals[i]) > 2.) { 273 eidxs[cum++] = i; 274 } 275 } 276 } 277 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 278 PetscCall(ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal)); 279 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal)); 280 PetscCall(PetscFree(eidxs)); 281 PetscCall(ISDestroy(&nedfieldlocal)); 282 PetscCall(ISDestroy(&enedfieldlocal)); 283 PetscFunctionReturn(0); 284 } 285 286 /* Compute some l2g maps */ 287 if (nedfieldlocal) { 288 IS is; 289 290 /* need to map from the local Nedelec field to local numbering */ 291 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g)); 292 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 293 PetscCall(ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is)); 294 PetscCall(ISLocalToGlobalMappingCreateIS(is,&al2g)); 295 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 296 if (global) { 297 PetscCall(PetscObjectReference((PetscObject)al2g)); 298 el2g = al2g; 299 } else { 300 IS gis; 301 302 PetscCall(ISRenumber(is,NULL,NULL,&gis)); 303 PetscCall(ISLocalToGlobalMappingCreateIS(gis,&el2g)); 304 PetscCall(ISDestroy(&gis)); 305 } 306 PetscCall(ISDestroy(&is)); 307 } else { 308 /* restore default */ 309 pcbddc->nedfield = -1; 310 /* one ref for the destruction of al2g, one for el2g */ 311 PetscCall(PetscObjectReference((PetscObject)al2g)); 312 PetscCall(PetscObjectReference((PetscObject)al2g)); 313 el2g = al2g; 314 fl2g = NULL; 315 } 316 317 /* Start communication to drop connections for interior edges (for cc analysis only) */ 318 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 319 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 320 if (nedfieldlocal) { 321 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 322 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 323 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 324 } else { 325 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 326 } 327 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 328 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 329 330 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 331 PetscCall(MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G)); 332 PetscCall(MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 333 if (global) { 334 PetscInt rst; 335 336 PetscCall(MatGetOwnershipRange(G,&rst,NULL)); 337 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 338 if (matis->sf_rootdata[i] < 2) { 339 matis->sf_rootdata[cum++] = i + rst; 340 } 341 } 342 PetscCall(MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE)); 343 PetscCall(MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL)); 344 } else { 345 PetscInt *tbz; 346 347 PetscCall(PetscMalloc1(ne,&tbz)); 348 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 349 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 350 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 351 for (i=0,cum=0;i<ne;i++) 352 if (matis->sf_leafdata[idxs[i]] == 1) 353 tbz[cum++] = i; 354 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 355 PetscCall(ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz)); 356 PetscCall(MatZeroRows(G,cum,tbz,0.,NULL,NULL)); 357 PetscCall(PetscFree(tbz)); 358 } 359 } else { /* we need the entire G to infer the nullspace */ 360 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 361 G = pcbddc->discretegradient; 362 } 363 364 /* Extract subdomain relevant rows of G */ 365 PetscCall(ISLocalToGlobalMappingGetIndices(el2g,&idxs)); 366 PetscCall(ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned)); 367 PetscCall(MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall)); 368 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g,&idxs)); 369 PetscCall(ISDestroy(&lned)); 370 PetscCall(MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis)); 371 PetscCall(MatDestroy(&lGall)); 372 PetscCall(MatISGetLocalMat(lGis,&lG)); 373 374 /* SF for nodal dofs communications */ 375 PetscCall(MatGetLocalSize(G,NULL,&Lv)); 376 PetscCall(MatISGetLocalToGlobalMapping(lGis,NULL,&vl2g)); 377 PetscCall(PetscObjectReference((PetscObject)vl2g)); 378 PetscCall(ISLocalToGlobalMappingGetSize(vl2g,&nv)); 379 PetscCall(PetscSFCreate(comm,&sfv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g,&idxs)); 381 PetscCall(PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs)); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs)); 383 i = singular ? 2 : 1; 384 PetscCall(PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots)); 385 386 /* Destroy temporary G created in MATIS format and modified G */ 387 PetscCall(PetscObjectReference((PetscObject)lG)); 388 PetscCall(MatDestroy(&lGis)); 389 PetscCall(MatDestroy(&G)); 390 391 if (print) { 392 PetscCall(PetscObjectSetName((PetscObject)lG,"initial_lG")); 393 PetscCall(MatView(lG,NULL)); 394 } 395 396 /* Save lG for values insertion in change of basis */ 397 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGinit)); 398 399 /* Analyze the edge-nodes connections (duplicate lG) */ 400 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGe)); 401 PetscCall(MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 402 PetscCall(PetscBTCreate(nv,&btv)); 403 PetscCall(PetscBTCreate(ne,&bte)); 404 PetscCall(PetscBTCreate(ne,&btb)); 405 PetscCall(PetscBTCreate(ne,&btbd)); 406 PetscCall(PetscBTCreate(nv,&btvcand)); 407 /* need to import the boundary specification to ensure the 408 proper detection of coarse edges' endpoints */ 409 if (pcbddc->DirichletBoundariesLocal) { 410 IS is; 411 412 if (fl2g) { 413 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is)); 414 } else { 415 is = pcbddc->DirichletBoundariesLocal; 416 } 417 PetscCall(ISGetLocalSize(is,&cum)); 418 PetscCall(ISGetIndices(is,&idxs)); 419 for (i=0;i<cum;i++) { 420 if (idxs[i] >= 0) { 421 PetscCall(PetscBTSet(btb,idxs[i])); 422 PetscCall(PetscBTSet(btbd,idxs[i])); 423 } 424 } 425 PetscCall(ISRestoreIndices(is,&idxs)); 426 if (fl2g) { 427 PetscCall(ISDestroy(&is)); 428 } 429 } 430 if (pcbddc->NeumannBoundariesLocal) { 431 IS is; 432 433 if (fl2g) { 434 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is)); 435 } else { 436 is = pcbddc->NeumannBoundariesLocal; 437 } 438 PetscCall(ISGetLocalSize(is,&cum)); 439 PetscCall(ISGetIndices(is,&idxs)); 440 for (i=0;i<cum;i++) { 441 if (idxs[i] >= 0) { 442 PetscCall(PetscBTSet(btb,idxs[i])); 443 } 444 } 445 PetscCall(ISRestoreIndices(is,&idxs)); 446 if (fl2g) { 447 PetscCall(ISDestroy(&is)); 448 } 449 } 450 451 /* Count neighs per dof */ 452 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs)); 453 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs)); 454 455 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 456 for proper detection of coarse edges' endpoints */ 457 PetscCall(PetscBTCreate(ne,&btee)); 458 for (i=0;i<ne;i++) { 459 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 460 PetscCall(PetscBTSet(btee,i)); 461 } 462 } 463 PetscCall(PetscMalloc1(ne,&marks)); 464 if (!conforming) { 465 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 466 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 467 } 468 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 469 PetscCall(MatSeqAIJGetArray(lGe,&vals)); 470 cum = 0; 471 for (i=0;i<ne;i++) { 472 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 473 if (!PetscBTLookup(btee,i)) { 474 marks[cum++] = i; 475 continue; 476 } 477 /* set badly connected edge dofs as primal */ 478 if (!conforming) { 479 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 480 marks[cum++] = i; 481 PetscCall(PetscBTSet(bte,i)); 482 for (j=ii[i];j<ii[i+1];j++) { 483 PetscCall(PetscBTSet(btv,jj[j])); 484 } 485 } else { 486 /* every edge dofs should be connected trough a certain number of nodal dofs 487 to other edge dofs belonging to coarse edges 488 - at most 2 endpoints 489 - order-1 interior nodal dofs 490 - no undefined nodal dofs (nconn < order) 491 */ 492 PetscInt ends = 0,ints = 0, undef = 0; 493 for (j=ii[i];j<ii[i+1];j++) { 494 PetscInt v = jj[j],k; 495 PetscInt nconn = iit[v+1]-iit[v]; 496 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 497 if (nconn > order) ends++; 498 else if (nconn == order) ints++; 499 else undef++; 500 } 501 if (undef || ends > 2 || ints != order -1) { 502 marks[cum++] = i; 503 PetscCall(PetscBTSet(bte,i)); 504 for (j=ii[i];j<ii[i+1];j++) { 505 PetscCall(PetscBTSet(btv,jj[j])); 506 } 507 } 508 } 509 } 510 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 511 if (!order && ii[i+1] != ii[i]) { 512 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 513 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 514 } 515 } 516 PetscCall(PetscBTDestroy(&btee)); 517 PetscCall(MatSeqAIJRestoreArray(lGe,&vals)); 518 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 519 if (!conforming) { 520 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 521 PetscCall(MatDestroy(&lGt)); 522 } 523 PetscCall(MatZeroRows(lGe,cum,marks,0.,NULL,NULL)); 524 525 /* identify splitpoints and corner candidates */ 526 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 527 if (print) { 528 PetscCall(PetscObjectSetName((PetscObject)lGe,"edgerestr_lG")); 529 PetscCall(MatView(lGe,NULL)); 530 PetscCall(PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt")); 531 PetscCall(MatView(lGt,NULL)); 532 } 533 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 534 PetscCall(MatSeqAIJGetArray(lGt,&vals)); 535 for (i=0;i<nv;i++) { 536 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 537 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 538 if (!order) { /* variable order */ 539 PetscReal vorder = 0.; 540 541 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 542 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 543 PetscCheck(vorder-test <= PETSC_SQRT_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%" PetscInt_FMT ")",(double)vorder,test); 544 ord = 1; 545 } 546 PetscAssert(test%ord == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT,test,i,ord); 547 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 548 if (PetscBTLookup(btbd,jj[j])) { 549 bdir = PETSC_TRUE; 550 break; 551 } 552 if (vc != ecount[jj[j]]) { 553 sneighs = PETSC_FALSE; 554 } else { 555 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 556 for (k=0;k<vc;k++) { 557 if (vn[k] != en[k]) { 558 sneighs = PETSC_FALSE; 559 break; 560 } 561 } 562 } 563 } 564 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 565 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n",i,PetscBools[!sneighs],PetscBools[test >= 3*ord],PetscBools[bdir]); 566 PetscCall(PetscBTSet(btv,i)); 567 } else if (test == ord) { 568 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 569 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %" PetscInt_FMT "\n",i); 570 PetscCall(PetscBTSet(btv,i)); 571 } else { 572 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %" PetscInt_FMT "\n",i); 573 PetscCall(PetscBTSet(btvcand,i)); 574 } 575 } 576 } 577 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs)); 578 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs)); 579 PetscCall(PetscBTDestroy(&btbd)); 580 581 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 582 if (order != 1) { 583 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 584 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 585 for (i=0;i<nv;i++) { 586 if (PetscBTLookup(btvcand,i)) { 587 PetscBool found = PETSC_FALSE; 588 for (j=ii[i];j<ii[i+1] && !found;j++) { 589 PetscInt k,e = jj[j]; 590 if (PetscBTLookup(bte,e)) continue; 591 for (k=iit[e];k<iit[e+1];k++) { 592 PetscInt v = jjt[k]; 593 if (v != i && PetscBTLookup(btvcand,v)) { 594 found = PETSC_TRUE; 595 break; 596 } 597 } 598 } 599 if (!found) { 600 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %" PetscInt_FMT " CLEARED\n",i); 601 PetscCall(PetscBTClear(btvcand,i)); 602 } else { 603 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %" PetscInt_FMT " ACCEPTED\n",i); 604 } 605 } 606 } 607 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 608 } 609 PetscCall(MatSeqAIJRestoreArray(lGt,&vals)); 610 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 611 PetscCall(MatDestroy(&lGe)); 612 613 /* Get the local G^T explicitly */ 614 PetscCall(MatDestroy(&lGt)); 615 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 616 PetscCall(MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 617 618 /* Mark interior nodal dofs */ 619 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 620 PetscCall(PetscBTCreate(nv,&btvi)); 621 for (i=1;i<n_neigh;i++) { 622 for (j=0;j<n_shared[i];j++) { 623 PetscCall(PetscBTSet(btvi,shared[i][j])); 624 } 625 } 626 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 627 628 /* communicate corners and splitpoints */ 629 PetscCall(PetscMalloc1(nv,&vmarks)); 630 PetscCall(PetscArrayzero(sfvleaves,nv)); 631 PetscCall(PetscArrayzero(sfvroots,Lv)); 632 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 633 634 if (print) { 635 IS tbz; 636 637 cum = 0; 638 for (i=0;i<nv;i++) 639 if (sfvleaves[i]) 640 vmarks[cum++] = i; 641 642 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 643 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local")); 644 PetscCall(ISView(tbz,NULL)); 645 PetscCall(ISDestroy(&tbz)); 646 } 647 648 PetscCall(PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 649 PetscCall(PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 650 PetscCall(PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 651 PetscCall(PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 652 653 /* Zero rows of lGt corresponding to identified corners 654 and interior nodal dofs */ 655 cum = 0; 656 for (i=0;i<nv;i++) { 657 if (sfvleaves[i]) { 658 vmarks[cum++] = i; 659 PetscCall(PetscBTSet(btv,i)); 660 } 661 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 662 } 663 PetscCall(PetscBTDestroy(&btvi)); 664 if (print) { 665 IS tbz; 666 667 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 668 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior")); 669 PetscCall(ISView(tbz,NULL)); 670 PetscCall(ISDestroy(&tbz)); 671 } 672 PetscCall(MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL)); 673 PetscCall(PetscFree(vmarks)); 674 PetscCall(PetscSFDestroy(&sfv)); 675 PetscCall(PetscFree2(sfvleaves,sfvroots)); 676 677 /* Recompute G */ 678 PetscCall(MatDestroy(&lG)); 679 PetscCall(MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG)); 680 if (print) { 681 PetscCall(PetscObjectSetName((PetscObject)lG,"used_lG")); 682 PetscCall(MatView(lG,NULL)); 683 PetscCall(PetscObjectSetName((PetscObject)lGt,"used_lGt")); 684 PetscCall(MatView(lGt,NULL)); 685 } 686 687 /* Get primal dofs (if any) */ 688 cum = 0; 689 for (i=0;i<ne;i++) { 690 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 691 } 692 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,marks,marks)); 693 PetscCall(ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals)); 694 if (print) { 695 PetscCall(PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs")); 696 PetscCall(ISView(primals,NULL)); 697 } 698 PetscCall(PetscBTDestroy(&bte)); 699 /* TODO: what if the user passed in some of them ? */ 700 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 701 PetscCall(ISDestroy(&primals)); 702 703 /* Compute edge connectivity */ 704 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_")); 705 706 /* Symbolic conn = lG*lGt */ 707 PetscCall(MatProductCreate(lG,lGt,NULL,&conn)); 708 PetscCall(MatProductSetType(conn,MATPRODUCT_AB)); 709 PetscCall(MatProductSetAlgorithm(conn,"default")); 710 PetscCall(MatProductSetFill(conn,PETSC_DEFAULT)); 711 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_")); 712 PetscCall(MatProductSetFromOptions(conn)); 713 PetscCall(MatProductSymbolic(conn)); 714 715 PetscCall(MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 716 if (fl2g) { 717 PetscBT btf; 718 PetscInt *iia,*jja,*iiu,*jju; 719 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 720 721 /* create CSR for all local dofs */ 722 PetscCall(PetscMalloc1(n+1,&iia)); 723 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 724 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT,pcbddc->mat_graph->nvtxs_csr,n); 725 iiu = pcbddc->mat_graph->xadj; 726 jju = pcbddc->mat_graph->adjncy; 727 } else if (pcbddc->use_local_adj) { 728 rest = PETSC_TRUE; 729 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 730 } else { 731 free = PETSC_TRUE; 732 PetscCall(PetscMalloc2(n+1,&iiu,n,&jju)); 733 iiu[0] = 0; 734 for (i=0;i<n;i++) { 735 iiu[i+1] = i+1; 736 jju[i] = -1; 737 } 738 } 739 740 /* import sizes of CSR */ 741 iia[0] = 0; 742 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 743 744 /* overwrite entries corresponding to the Nedelec field */ 745 PetscCall(PetscBTCreate(n,&btf)); 746 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 747 for (i=0;i<ne;i++) { 748 PetscCall(PetscBTSet(btf,idxs[i])); 749 iia[idxs[i]+1] = ii[i+1]-ii[i]; 750 } 751 752 /* iia in CSR */ 753 for (i=0;i<n;i++) iia[i+1] += iia[i]; 754 755 /* jja in CSR */ 756 PetscCall(PetscMalloc1(iia[n],&jja)); 757 for (i=0;i<n;i++) 758 if (!PetscBTLookup(btf,i)) 759 for (j=0;j<iiu[i+1]-iiu[i];j++) 760 jja[iia[i]+j] = jju[iiu[i]+j]; 761 762 /* map edge dofs connectivity */ 763 if (jj) { 764 PetscCall(ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj)); 765 for (i=0;i<ne;i++) { 766 PetscInt e = idxs[i]; 767 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 768 } 769 } 770 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 771 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER)); 772 if (rest) { 773 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 774 } 775 if (free) PetscCall(PetscFree2(iiu,jju)); 776 PetscCall(PetscBTDestroy(&btf)); 777 } else { 778 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER)); 779 } 780 781 /* Analyze interface for edge dofs */ 782 PetscCall(PCBDDCAnalyzeInterface(pc)); 783 pcbddc->mat_graph->twodim = PETSC_FALSE; 784 785 /* Get coarse edges in the edge space */ 786 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 787 PetscCall(MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 788 789 if (fl2g) { 790 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 791 PetscCall(PetscMalloc1(nee,&eedges)); 792 for (i=0;i<nee;i++) { 793 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 794 } 795 } else { 796 eedges = alleedges; 797 primals = allprimals; 798 } 799 800 /* Mark fine edge dofs with their coarse edge id */ 801 PetscCall(PetscArrayzero(marks,ne)); 802 PetscCall(ISGetLocalSize(primals,&cum)); 803 PetscCall(ISGetIndices(primals,&idxs)); 804 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 805 PetscCall(ISRestoreIndices(primals,&idxs)); 806 if (print) { 807 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs")); 808 PetscCall(ISView(primals,NULL)); 809 } 810 811 maxsize = 0; 812 for (i=0;i<nee;i++) { 813 PetscInt size,mark = i+1; 814 815 PetscCall(ISGetLocalSize(eedges[i],&size)); 816 PetscCall(ISGetIndices(eedges[i],&idxs)); 817 for (j=0;j<size;j++) marks[idxs[j]] = mark; 818 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 819 maxsize = PetscMax(maxsize,size); 820 } 821 822 /* Find coarse edge endpoints */ 823 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 824 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 825 for (i=0;i<nee;i++) { 826 PetscInt mark = i+1,size; 827 828 PetscCall(ISGetLocalSize(eedges[i],&size)); 829 if (!size && nedfieldlocal) continue; 830 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 831 PetscCall(ISGetIndices(eedges[i],&idxs)); 832 if (print) { 833 PetscCall(PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n",i)); 834 PetscCall(ISView(eedges[i],NULL)); 835 } 836 for (j=0;j<size;j++) { 837 PetscInt k, ee = idxs[j]; 838 if (print) PetscPrintf(PETSC_COMM_SELF," idx %" PetscInt_FMT "\n",ee); 839 for (k=ii[ee];k<ii[ee+1];k++) { 840 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %" PetscInt_FMT "\n",jj[k]); 841 if (PetscBTLookup(btv,jj[k])) { 842 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %" PetscInt_FMT "\n",jj[k]); 843 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 844 PetscInt k2; 845 PetscBool corner = PETSC_FALSE; 846 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 847 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,(int)!!PetscBTLookup(btb,jjt[k2])); 848 /* it's a corner if either is connected with an edge dof belonging to a different cc or 849 if the edge dof lie on the natural part of the boundary */ 850 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 851 corner = PETSC_TRUE; 852 break; 853 } 854 } 855 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 856 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %" PetscInt_FMT "\n",jj[k]); 857 PetscCall(PetscBTSet(btv,jj[k])); 858 } else { 859 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 860 } 861 } 862 } 863 } 864 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 865 } 866 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 867 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 868 PetscCall(PetscBTDestroy(&btb)); 869 870 /* Reset marked primal dofs */ 871 PetscCall(ISGetLocalSize(primals,&cum)); 872 PetscCall(ISGetIndices(primals,&idxs)); 873 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 874 PetscCall(ISRestoreIndices(primals,&idxs)); 875 876 /* Now use the initial lG */ 877 PetscCall(MatDestroy(&lG)); 878 PetscCall(MatDestroy(&lGt)); 879 lG = lGinit; 880 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 881 882 /* Compute extended cols indices */ 883 PetscCall(PetscBTCreate(nv,&btvc)); 884 PetscCall(PetscBTCreate(nee,&bter)); 885 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 886 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG,&i)); 887 i *= maxsize; 888 PetscCall(PetscCalloc1(nee,&extcols)); 889 PetscCall(PetscMalloc2(i,&extrow,i,&gidxs)); 890 eerr = PETSC_FALSE; 891 for (i=0;i<nee;i++) { 892 PetscInt size,found = 0; 893 894 cum = 0; 895 PetscCall(ISGetLocalSize(eedges[i],&size)); 896 if (!size && nedfieldlocal) continue; 897 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 898 PetscCall(ISGetIndices(eedges[i],&idxs)); 899 PetscCall(PetscBTMemzero(nv,btvc)); 900 for (j=0;j<size;j++) { 901 PetscInt k,ee = idxs[j]; 902 for (k=ii[ee];k<ii[ee+1];k++) { 903 PetscInt vv = jj[k]; 904 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 905 else if (!PetscBTLookupSet(btvc,vv)) found++; 906 } 907 } 908 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 909 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 910 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 911 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 912 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 913 /* it may happen that endpoints are not defined at this point 914 if it is the case, mark this edge for a second pass */ 915 if (cum != size -1 || found != 2) { 916 PetscCall(PetscBTSet(bter,i)); 917 if (print) { 918 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge")); 919 PetscCall(ISView(eedges[i],NULL)); 920 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol")); 921 PetscCall(ISView(extcols[i],NULL)); 922 } 923 eerr = PETSC_TRUE; 924 } 925 } 926 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 927 PetscCall(MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm)); 928 if (done) { 929 PetscInt *newprimals; 930 931 PetscCall(PetscMalloc1(ne,&newprimals)); 932 PetscCall(ISGetLocalSize(primals,&cum)); 933 PetscCall(ISGetIndices(primals,&idxs)); 934 PetscCall(PetscArraycpy(newprimals,idxs,cum)); 935 PetscCall(ISRestoreIndices(primals,&idxs)); 936 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 937 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %s)\n",PetscBools[eerr]); 938 for (i=0;i<nee;i++) { 939 PetscBool has_candidates = PETSC_FALSE; 940 if (PetscBTLookup(bter,i)) { 941 PetscInt size,mark = i+1; 942 943 PetscCall(ISGetLocalSize(eedges[i],&size)); 944 PetscCall(ISGetIndices(eedges[i],&idxs)); 945 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 946 for (j=0;j<size;j++) { 947 PetscInt k,ee = idxs[j]; 948 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n",ee,ii[ee],ii[ee+1]); 949 for (k=ii[ee];k<ii[ee+1];k++) { 950 /* set all candidates located on the edge as corners */ 951 if (PetscBTLookup(btvcand,jj[k])) { 952 PetscInt k2,vv = jj[k]; 953 has_candidates = PETSC_TRUE; 954 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %" PetscInt_FMT "\n",vv); 955 PetscCall(PetscBTSet(btv,vv)); 956 /* set all edge dofs connected to candidate as primals */ 957 for (k2=iit[vv];k2<iit[vv+1];k2++) { 958 if (marks[jjt[k2]] == mark) { 959 PetscInt k3,ee2 = jjt[k2]; 960 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %" PetscInt_FMT "\n",ee2); 961 newprimals[cum++] = ee2; 962 /* finally set the new corners */ 963 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 964 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %" PetscInt_FMT "\n",jj[k3]); 965 PetscCall(PetscBTSet(btv,jj[k3])); 966 } 967 } 968 } 969 } else { 970 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %" PetscInt_FMT "\n",jj[k]); 971 } 972 } 973 } 974 if (!has_candidates) { /* circular edge */ 975 PetscInt k, ee = idxs[0],*tmarks; 976 977 PetscCall(PetscCalloc1(ne,&tmarks)); 978 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %" PetscInt_FMT "\n",i); 979 for (k=ii[ee];k<ii[ee+1];k++) { 980 PetscInt k2; 981 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %" PetscInt_FMT "\n",jj[k]); 982 PetscCall(PetscBTSet(btv,jj[k])); 983 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 984 } 985 for (j=0;j<size;j++) { 986 if (tmarks[idxs[j]] > 1) { 987 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %" PetscInt_FMT "\n",idxs[j]); 988 newprimals[cum++] = idxs[j]; 989 } 990 } 991 PetscCall(PetscFree(tmarks)); 992 } 993 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 994 } 995 PetscCall(ISDestroy(&extcols[i])); 996 } 997 PetscCall(PetscFree(extcols)); 998 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 999 PetscCall(PetscSortRemoveDupsInt(&cum,newprimals)); 1000 if (fl2g) { 1001 PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals)); 1002 PetscCall(ISDestroy(&primals)); 1003 for (i=0;i<nee;i++) { 1004 PetscCall(ISDestroy(&eedges[i])); 1005 } 1006 PetscCall(PetscFree(eedges)); 1007 } 1008 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1009 PetscCall(ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals)); 1010 PetscCall(PetscFree(newprimals)); 1011 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 1012 PetscCall(ISDestroy(&primals)); 1013 PetscCall(PCBDDCAnalyzeInterface(pc)); 1014 pcbddc->mat_graph->twodim = PETSC_FALSE; 1015 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1016 if (fl2g) { 1017 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 1018 PetscCall(PetscMalloc1(nee,&eedges)); 1019 for (i=0;i<nee;i++) { 1020 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 1021 } 1022 } else { 1023 eedges = alleedges; 1024 primals = allprimals; 1025 } 1026 PetscCall(PetscCalloc1(nee,&extcols)); 1027 1028 /* Mark again */ 1029 PetscCall(PetscArrayzero(marks,ne)); 1030 for (i=0;i<nee;i++) { 1031 PetscInt size,mark = i+1; 1032 1033 PetscCall(ISGetLocalSize(eedges[i],&size)); 1034 PetscCall(ISGetIndices(eedges[i],&idxs)); 1035 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1036 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1037 } 1038 if (print) { 1039 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass")); 1040 PetscCall(ISView(primals,NULL)); 1041 } 1042 1043 /* Recompute extended cols */ 1044 eerr = PETSC_FALSE; 1045 for (i=0;i<nee;i++) { 1046 PetscInt size; 1047 1048 cum = 0; 1049 PetscCall(ISGetLocalSize(eedges[i],&size)); 1050 if (!size && nedfieldlocal) continue; 1051 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 1052 PetscCall(ISGetIndices(eedges[i],&idxs)); 1053 for (j=0;j<size;j++) { 1054 PetscInt k,ee = idxs[j]; 1055 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1056 } 1057 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1058 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 1059 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 1060 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 1061 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 1062 if (cum != size -1) { 1063 if (print) { 1064 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass")); 1065 PetscCall(ISView(eedges[i],NULL)); 1066 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass")); 1067 PetscCall(ISView(extcols[i],NULL)); 1068 } 1069 eerr = PETSC_TRUE; 1070 } 1071 } 1072 } 1073 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1074 PetscCall(PetscFree2(extrow,gidxs)); 1075 PetscCall(PetscBTDestroy(&bter)); 1076 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF)); 1077 /* an error should not occur at this point */ 1078 PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1079 1080 /* Check the number of endpoints */ 1081 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1082 PetscCall(PetscMalloc1(2*nee,&corners)); 1083 PetscCall(PetscMalloc1(nee,&cedges)); 1084 for (i=0;i<nee;i++) { 1085 PetscInt size, found = 0, gc[2]; 1086 1087 /* init with defaults */ 1088 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1089 PetscCall(ISGetLocalSize(eedges[i],&size)); 1090 if (!size && nedfieldlocal) continue; 1091 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 1092 PetscCall(ISGetIndices(eedges[i],&idxs)); 1093 PetscCall(PetscBTMemzero(nv,btvc)); 1094 for (j=0;j<size;j++) { 1095 PetscInt k,ee = idxs[j]; 1096 for (k=ii[ee];k<ii[ee+1];k++) { 1097 PetscInt vv = jj[k]; 1098 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1099 PetscCheck(found != 2,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %" PetscInt_FMT,i); 1100 corners[i*2+found++] = vv; 1101 } 1102 } 1103 } 1104 if (found != 2) { 1105 PetscInt e; 1106 if (fl2g) { 1107 PetscCall(ISLocalToGlobalMappingApply(fl2g,1,idxs,&e)); 1108 } else { 1109 e = idxs[0]; 1110 } 1111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")",found,i,e,idxs[0]); 1112 } 1113 1114 /* get primal dof index on this coarse edge */ 1115 PetscCall(ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc)); 1116 if (gc[0] > gc[1]) { 1117 PetscInt swap = corners[2*i]; 1118 corners[2*i] = corners[2*i+1]; 1119 corners[2*i+1] = swap; 1120 } 1121 cedges[i] = idxs[size-1]; 1122 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1123 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1124 } 1125 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1126 PetscCall(PetscBTDestroy(&btvc)); 1127 1128 if (PetscDefined(USE_DEBUG)) { 1129 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1130 not interfere with neighbouring coarse edges */ 1131 PetscCall(PetscMalloc1(nee+1,&emarks)); 1132 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1133 for (i=0;i<nv;i++) { 1134 PetscInt emax = 0,eemax = 0; 1135 1136 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1137 PetscCall(PetscArrayzero(emarks,nee+1)); 1138 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1139 for (j=1;j<nee+1;j++) { 1140 if (emax < emarks[j]) { 1141 emax = emarks[j]; 1142 eemax = j; 1143 } 1144 } 1145 /* not relevant for edges */ 1146 if (!eemax) continue; 1147 1148 for (j=ii[i];j<ii[i+1];j++) { 1149 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1150 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT,marks[jj[j]]-1,eemax,i,jj[j]); 1151 } 1152 } 1153 } 1154 PetscCall(PetscFree(emarks)); 1155 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1156 } 1157 1158 /* Compute extended rows indices for edge blocks of the change of basis */ 1159 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1160 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt,&extmem)); 1161 extmem *= maxsize; 1162 PetscCall(PetscMalloc1(extmem*nee,&extrow)); 1163 PetscCall(PetscMalloc1(nee,&extrows)); 1164 PetscCall(PetscCalloc1(nee,&extrowcum)); 1165 for (i=0;i<nv;i++) { 1166 PetscInt mark = 0,size,start; 1167 1168 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1169 for (j=ii[i];j<ii[i+1];j++) 1170 if (marks[jj[j]] && !mark) 1171 mark = marks[jj[j]]; 1172 1173 /* not relevant */ 1174 if (!mark) continue; 1175 1176 /* import extended row */ 1177 mark--; 1178 start = mark*extmem+extrowcum[mark]; 1179 size = ii[i+1]-ii[i]; 1180 PetscCheck(extrowcum[mark] + size <= extmem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT,extrowcum[mark] + size,extmem); 1181 PetscCall(PetscArraycpy(extrow+start,jj+ii[i],size)); 1182 extrowcum[mark] += size; 1183 } 1184 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1185 PetscCall(MatDestroy(&lGt)); 1186 PetscCall(PetscFree(marks)); 1187 1188 /* Compress extrows */ 1189 cum = 0; 1190 for (i=0;i<nee;i++) { 1191 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1192 PetscCall(PetscSortRemoveDupsInt(&size,start)); 1193 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i])); 1194 cum = PetscMax(cum,size); 1195 } 1196 PetscCall(PetscFree(extrowcum)); 1197 PetscCall(PetscBTDestroy(&btv)); 1198 PetscCall(PetscBTDestroy(&btvcand)); 1199 1200 /* Workspace for lapack inner calls and VecSetValues */ 1201 PetscCall(PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork)); 1202 1203 /* Create change of basis matrix (preallocation can be improved) */ 1204 PetscCall(MatCreate(comm,&T)); 1205 PetscCall(MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,pc->pmat->rmap->N,pc->pmat->rmap->N)); 1206 PetscCall(MatSetType(T,MATAIJ)); 1207 PetscCall(MatSeqAIJSetPreallocation(T,10,NULL)); 1208 PetscCall(MatMPIAIJSetPreallocation(T,10,NULL,10,NULL)); 1209 PetscCall(MatSetLocalToGlobalMapping(T,al2g,al2g)); 1210 PetscCall(MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 1211 PetscCall(MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE)); 1212 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1213 1214 /* Defaults to identity */ 1215 PetscCall(MatCreateVecs(pc->pmat,&tvec,NULL)); 1216 PetscCall(VecSet(tvec,1.0)); 1217 PetscCall(MatDiagonalSet(T,tvec,INSERT_VALUES)); 1218 PetscCall(VecDestroy(&tvec)); 1219 1220 /* Create discrete gradient for the coarser level if needed */ 1221 PetscCall(MatDestroy(&pcbddc->nedcG)); 1222 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1223 if (pcbddc->current_level < pcbddc->max_levels) { 1224 ISLocalToGlobalMapping cel2g,cvl2g; 1225 IS wis,gwis; 1226 PetscInt cnv,cne; 1227 1228 PetscCall(ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis)); 1229 if (fl2g) { 1230 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal)); 1231 } else { 1232 PetscCall(PetscObjectReference((PetscObject)wis)); 1233 pcbddc->nedclocal = wis; 1234 } 1235 PetscCall(ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis)); 1236 PetscCall(ISDestroy(&wis)); 1237 PetscCall(ISRenumber(gwis,NULL,&cne,&wis)); 1238 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cel2g)); 1239 PetscCall(ISDestroy(&wis)); 1240 PetscCall(ISDestroy(&gwis)); 1241 1242 PetscCall(ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis)); 1243 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis)); 1244 PetscCall(ISDestroy(&wis)); 1245 PetscCall(ISRenumber(gwis,NULL,&cnv,&wis)); 1246 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cvl2g)); 1247 PetscCall(ISDestroy(&wis)); 1248 PetscCall(ISDestroy(&gwis)); 1249 1250 PetscCall(MatCreate(comm,&pcbddc->nedcG)); 1251 PetscCall(MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv)); 1252 PetscCall(MatSetType(pcbddc->nedcG,MATAIJ)); 1253 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL)); 1254 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL)); 1255 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g)); 1256 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1257 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1258 } 1259 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1260 1261 #if defined(PRINT_GDET) 1262 inc = 0; 1263 lev = pcbddc->current_level; 1264 #endif 1265 1266 /* Insert values in the change of basis matrix */ 1267 for (i=0;i<nee;i++) { 1268 Mat Gins = NULL, GKins = NULL; 1269 IS cornersis = NULL; 1270 PetscScalar cvals[2]; 1271 1272 if (pcbddc->nedcG) { 1273 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis)); 1274 } 1275 PetscCall(PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork)); 1276 if (Gins && GKins) { 1277 const PetscScalar *data; 1278 const PetscInt *rows,*cols; 1279 PetscInt nrh,nch,nrc,ncc; 1280 1281 PetscCall(ISGetIndices(eedges[i],&cols)); 1282 /* H1 */ 1283 PetscCall(ISGetIndices(extrows[i],&rows)); 1284 PetscCall(MatGetSize(Gins,&nrh,&nch)); 1285 PetscCall(MatDenseGetArrayRead(Gins,&data)); 1286 PetscCall(MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES)); 1287 PetscCall(MatDenseRestoreArrayRead(Gins,&data)); 1288 PetscCall(ISRestoreIndices(extrows[i],&rows)); 1289 /* complement */ 1290 PetscCall(MatGetSize(GKins,&nrc,&ncc)); 1291 PetscCheck(ncc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %" PetscInt_FMT,i); 1292 PetscCheck(ncc + nch == nrc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT,ncc,nch,nrc,i); 1293 PetscCheck(ncc == 1 || !pcbddc->nedcG,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT,i,ncc); 1294 PetscCall(MatDenseGetArrayRead(GKins,&data)); 1295 PetscCall(MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES)); 1296 PetscCall(MatDenseRestoreArrayRead(GKins,&data)); 1297 1298 /* coarse discrete gradient */ 1299 if (pcbddc->nedcG) { 1300 PetscInt cols[2]; 1301 1302 cols[0] = 2*i; 1303 cols[1] = 2*i+1; 1304 PetscCall(MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES)); 1305 } 1306 PetscCall(ISRestoreIndices(eedges[i],&cols)); 1307 } 1308 PetscCall(ISDestroy(&extrows[i])); 1309 PetscCall(ISDestroy(&extcols[i])); 1310 PetscCall(ISDestroy(&cornersis)); 1311 PetscCall(MatDestroy(&Gins)); 1312 PetscCall(MatDestroy(&GKins)); 1313 } 1314 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1315 1316 /* Start assembling */ 1317 PetscCall(MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY)); 1318 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1319 1320 /* Free */ 1321 if (fl2g) { 1322 PetscCall(ISDestroy(&primals)); 1323 for (i=0;i<nee;i++) { 1324 PetscCall(ISDestroy(&eedges[i])); 1325 } 1326 PetscCall(PetscFree(eedges)); 1327 } 1328 1329 /* hack mat_graph with primal dofs on the coarse edges */ 1330 { 1331 PCBDDCGraph graph = pcbddc->mat_graph; 1332 PetscInt *oqueue = graph->queue; 1333 PetscInt *ocptr = graph->cptr; 1334 PetscInt ncc,*idxs; 1335 1336 /* find first primal edge */ 1337 if (pcbddc->nedclocal) { 1338 PetscCall(ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1339 } else { 1340 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges)); 1341 idxs = cedges; 1342 } 1343 cum = 0; 1344 while (cum < nee && cedges[cum] < 0) cum++; 1345 1346 /* adapt connected components */ 1347 PetscCall(PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue)); 1348 graph->cptr[0] = 0; 1349 for (i=0,ncc=0;i<graph->ncc;i++) { 1350 PetscInt lc = ocptr[i+1]-ocptr[i]; 1351 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1352 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1353 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1354 ncc++; 1355 lc--; 1356 cum++; 1357 while (cum < nee && cedges[cum] < 0) cum++; 1358 } 1359 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1360 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1361 ncc++; 1362 } 1363 graph->ncc = ncc; 1364 if (pcbddc->nedclocal) { 1365 PetscCall(ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1366 } 1367 PetscCall(PetscFree2(ocptr,oqueue)); 1368 } 1369 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1370 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1371 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1372 PetscCall(MatDestroy(&conn)); 1373 1374 PetscCall(ISDestroy(&nedfieldlocal)); 1375 PetscCall(PetscFree(extrow)); 1376 PetscCall(PetscFree2(work,rwork)); 1377 PetscCall(PetscFree(corners)); 1378 PetscCall(PetscFree(cedges)); 1379 PetscCall(PetscFree(extrows)); 1380 PetscCall(PetscFree(extcols)); 1381 PetscCall(MatDestroy(&lG)); 1382 1383 /* Complete assembling */ 1384 PetscCall(MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY)); 1385 if (pcbddc->nedcG) { 1386 PetscCall(MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1387 #if 0 1388 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1389 PetscCall(MatView(pcbddc->nedcG,NULL)); 1390 #endif 1391 } 1392 1393 /* set change of basis */ 1394 PetscCall(PCBDDCSetChangeOfBasisMat(pc,T,singular)); 1395 PetscCall(MatDestroy(&T)); 1396 1397 PetscFunctionReturn(0); 1398 } 1399 1400 /* the near-null space of BDDC carries information on quadrature weights, 1401 and these can be collinear -> so cheat with MatNullSpaceCreate 1402 and create a suitable set of basis vectors first */ 1403 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1404 { 1405 PetscInt i; 1406 1407 PetscFunctionBegin; 1408 for (i=0;i<nvecs;i++) { 1409 PetscInt first,last; 1410 1411 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1412 PetscCheck(last-first >= 2*nvecs || !has_const,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1413 if (i>=first && i < last) { 1414 PetscScalar *data; 1415 PetscCall(VecGetArray(quad_vecs[i],&data)); 1416 if (!has_const) { 1417 data[i-first] = 1.; 1418 } else { 1419 data[2*i-first] = 1./PetscSqrtReal(2.); 1420 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1421 } 1422 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1423 } 1424 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1425 } 1426 PetscCall(MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp)); 1427 for (i=0;i<nvecs;i++) { /* reset vectors */ 1428 PetscInt first,last; 1429 PetscCall(VecLockReadPop(quad_vecs[i])); 1430 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1431 if (i>=first && i < last) { 1432 PetscScalar *data; 1433 PetscCall(VecGetArray(quad_vecs[i],&data)); 1434 if (!has_const) { 1435 data[i-first] = 0.; 1436 } else { 1437 data[2*i-first] = 0.; 1438 data[2*i-first+1] = 0.; 1439 } 1440 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1441 } 1442 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1443 PetscCall(VecLockReadPush(quad_vecs[i])); 1444 } 1445 PetscFunctionReturn(0); 1446 } 1447 1448 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1449 { 1450 Mat loc_divudotp; 1451 Vec p,v,vins,quad_vec,*quad_vecs; 1452 ISLocalToGlobalMapping map; 1453 PetscScalar *vals; 1454 const PetscScalar *array; 1455 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1456 PetscInt n_neigh,*neigh,*n_shared,**shared; 1457 PetscMPIInt rank; 1458 1459 PetscFunctionBegin; 1460 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1461 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1462 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A))); 1463 if (!maxneighs) { 1464 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1465 *nnsp = NULL; 1466 PetscFunctionReturn(0); 1467 } 1468 maxsize = 0; 1469 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1470 PetscCall(PetscMalloc2(maxsize,&gidxs,maxsize,&vals)); 1471 /* create vectors to hold quadrature weights */ 1472 PetscCall(MatCreateVecs(A,&quad_vec,NULL)); 1473 if (!transpose) { 1474 PetscCall(MatISGetLocalToGlobalMapping(A,&map,NULL)); 1475 } else { 1476 PetscCall(MatISGetLocalToGlobalMapping(A,NULL,&map)); 1477 } 1478 PetscCall(VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs)); 1479 PetscCall(VecDestroy(&quad_vec)); 1480 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp)); 1481 for (i=0;i<maxneighs;i++) { 1482 PetscCall(VecLockReadPop(quad_vecs[i])); 1483 } 1484 1485 /* compute local quad vec */ 1486 PetscCall(MatISGetLocalMat(divudotp,&loc_divudotp)); 1487 if (!transpose) { 1488 PetscCall(MatCreateVecs(loc_divudotp,&v,&p)); 1489 } else { 1490 PetscCall(MatCreateVecs(loc_divudotp,&p,&v)); 1491 } 1492 PetscCall(VecSet(p,1.)); 1493 if (!transpose) { 1494 PetscCall(MatMultTranspose(loc_divudotp,p,v)); 1495 } else { 1496 PetscCall(MatMult(loc_divudotp,p,v)); 1497 } 1498 if (vl2l) { 1499 Mat lA; 1500 VecScatter sc; 1501 1502 PetscCall(MatISGetLocalMat(A,&lA)); 1503 PetscCall(MatCreateVecs(lA,&vins,NULL)); 1504 PetscCall(VecScatterCreate(v,NULL,vins,vl2l,&sc)); 1505 PetscCall(VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1506 PetscCall(VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1507 PetscCall(VecScatterDestroy(&sc)); 1508 } else { 1509 vins = v; 1510 } 1511 PetscCall(VecGetArrayRead(vins,&array)); 1512 PetscCall(VecDestroy(&p)); 1513 1514 /* insert in global quadrature vecs */ 1515 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 1516 for (i=1;i<n_neigh;i++) { 1517 const PetscInt *idxs; 1518 PetscInt idx,nn,j; 1519 1520 idxs = shared[i]; 1521 nn = n_shared[i]; 1522 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1523 PetscCall(PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx)); 1524 idx = -(idx+1); 1525 PetscCheck(idx >= 0 && idx < maxneighs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")",idx,maxneighs); 1526 PetscCall(ISLocalToGlobalMappingApply(map,nn,idxs,gidxs)); 1527 PetscCall(VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES)); 1528 } 1529 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1530 PetscCall(VecRestoreArrayRead(vins,&array)); 1531 if (vl2l) { 1532 PetscCall(VecDestroy(&vins)); 1533 } 1534 PetscCall(VecDestroy(&v)); 1535 PetscCall(PetscFree2(gidxs,vals)); 1536 1537 /* assemble near null space */ 1538 for (i=0;i<maxneighs;i++) { 1539 PetscCall(VecAssemblyBegin(quad_vecs[i])); 1540 } 1541 for (i=0;i<maxneighs;i++) { 1542 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1543 PetscCall(VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view")); 1544 PetscCall(VecLockReadPush(quad_vecs[i])); 1545 } 1546 PetscCall(VecDestroyVecs(maxneighs,&quad_vecs)); 1547 PetscFunctionReturn(0); 1548 } 1549 1550 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1551 { 1552 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1553 1554 PetscFunctionBegin; 1555 if (primalv) { 1556 if (pcbddc->user_primal_vertices_local) { 1557 IS list[2], newp; 1558 1559 list[0] = primalv; 1560 list[1] = pcbddc->user_primal_vertices_local; 1561 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp)); 1562 PetscCall(ISSortRemoveDups(newp)); 1563 PetscCall(ISDestroy(&list[1])); 1564 pcbddc->user_primal_vertices_local = newp; 1565 } else { 1566 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primalv)); 1567 } 1568 } 1569 PetscFunctionReturn(0); 1570 } 1571 1572 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1573 { 1574 PetscInt f, *comp = (PetscInt *)ctx; 1575 1576 PetscFunctionBegin; 1577 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1578 PetscFunctionReturn(0); 1579 } 1580 1581 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1582 { 1583 Vec local,global; 1584 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1585 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1586 PetscBool monolithic = PETSC_FALSE; 1587 1588 PetscFunctionBegin; 1589 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC"); 1590 PetscCall(PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL)); 1591 PetscOptionsEnd(); 1592 /* need to convert from global to local topology information and remove references to information in global ordering */ 1593 PetscCall(MatCreateVecs(pc->pmat,&global,NULL)); 1594 PetscCall(MatCreateVecs(matis->A,&local,NULL)); 1595 PetscCall(VecBindToCPU(global,PETSC_TRUE)); 1596 PetscCall(VecBindToCPU(local,PETSC_TRUE)); 1597 if (monolithic) { /* just get block size to properly compute vertices */ 1598 if (pcbddc->vertex_size == 1) { 1599 PetscCall(MatGetBlockSize(pc->pmat,&pcbddc->vertex_size)); 1600 } 1601 goto boundary; 1602 } 1603 1604 if (pcbddc->user_provided_isfordofs) { 1605 if (pcbddc->n_ISForDofs) { 1606 PetscInt i; 1607 1608 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal)); 1609 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1610 PetscInt bs; 1611 1612 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i])); 1613 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i],&bs)); 1614 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1615 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1616 } 1617 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1618 pcbddc->n_ISForDofs = 0; 1619 PetscCall(PetscFree(pcbddc->ISForDofs)); 1620 } 1621 } else { 1622 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1623 DM dm; 1624 1625 PetscCall(MatGetDM(pc->pmat, &dm)); 1626 if (!dm) { 1627 PetscCall(PCGetDM(pc, &dm)); 1628 } 1629 if (dm) { 1630 IS *fields; 1631 PetscInt nf,i; 1632 1633 PetscCall(DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL)); 1634 PetscCall(PetscMalloc1(nf,&pcbddc->ISForDofsLocal)); 1635 for (i=0;i<nf;i++) { 1636 PetscInt bs; 1637 1638 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i])); 1639 PetscCall(ISGetBlockSize(fields[i],&bs)); 1640 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1641 PetscCall(ISDestroy(&fields[i])); 1642 } 1643 PetscCall(PetscFree(fields)); 1644 pcbddc->n_ISForDofsLocal = nf; 1645 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1646 PetscContainer c; 1647 1648 PetscCall(PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c)); 1649 if (c) { 1650 MatISLocalFields lf; 1651 PetscCall(PetscContainerGetPointer(c,(void**)&lf)); 1652 PetscCall(PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf)); 1653 } else { /* fallback, create the default fields if bs > 1 */ 1654 PetscInt i, n = matis->A->rmap->n; 1655 PetscCall(MatGetBlockSize(pc->pmat,&i)); 1656 if (i > 1) { 1657 pcbddc->n_ISForDofsLocal = i; 1658 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal)); 1659 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1660 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i])); 1661 } 1662 } 1663 } 1664 } 1665 } else { 1666 PetscInt i; 1667 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1668 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i])); 1669 } 1670 } 1671 } 1672 1673 boundary: 1674 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1675 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal)); 1676 } else if (pcbddc->DirichletBoundariesLocal) { 1677 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal)); 1678 } 1679 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1680 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal)); 1681 } else if (pcbddc->NeumannBoundariesLocal) { 1682 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal)); 1683 } 1684 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1685 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local)); 1686 } 1687 PetscCall(VecDestroy(&global)); 1688 PetscCall(VecDestroy(&local)); 1689 /* detect local disconnected subdomains if requested (use matis->A) */ 1690 if (pcbddc->detect_disconnected) { 1691 IS primalv = NULL; 1692 PetscInt i; 1693 PetscBool filter = pcbddc->detect_disconnected_filter; 1694 1695 for (i=0;i<pcbddc->n_local_subs;i++) { 1696 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1697 } 1698 PetscCall(PetscFree(pcbddc->local_subs)); 1699 PetscCall(PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv)); 1700 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,primalv)); 1701 PetscCall(ISDestroy(&primalv)); 1702 } 1703 /* early stage corner detection */ 1704 { 1705 DM dm; 1706 1707 PetscCall(MatGetDM(pc->pmat,&dm)); 1708 if (!dm) { 1709 PetscCall(PCGetDM(pc,&dm)); 1710 } 1711 if (dm) { 1712 PetscBool isda; 1713 1714 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda)); 1715 if (isda) { 1716 ISLocalToGlobalMapping l2l; 1717 IS corners; 1718 Mat lA; 1719 PetscBool gl,lo; 1720 1721 { 1722 Vec cvec; 1723 const PetscScalar *coords; 1724 PetscInt dof,n,cdim; 1725 PetscBool memc = PETSC_TRUE; 1726 1727 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1728 PetscCall(DMGetCoordinates(dm,&cvec)); 1729 PetscCall(VecGetLocalSize(cvec,&n)); 1730 PetscCall(VecGetBlockSize(cvec,&cdim)); 1731 n /= cdim; 1732 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1733 PetscCall(PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords)); 1734 PetscCall(VecGetArrayRead(cvec,&coords)); 1735 #if defined(PETSC_USE_COMPLEX) 1736 memc = PETSC_FALSE; 1737 #endif 1738 if (dof != 1) memc = PETSC_FALSE; 1739 if (memc) { 1740 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof)); 1741 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1742 PetscReal *bcoords = pcbddc->mat_graph->coords; 1743 PetscInt i, b, d; 1744 1745 for (i=0;i<n;i++) { 1746 for (b=0;b<dof;b++) { 1747 for (d=0;d<cdim;d++) { 1748 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1749 } 1750 } 1751 } 1752 } 1753 PetscCall(VecRestoreArrayRead(cvec,&coords)); 1754 pcbddc->mat_graph->cdim = cdim; 1755 pcbddc->mat_graph->cnloc = dof*n; 1756 pcbddc->mat_graph->cloc = PETSC_FALSE; 1757 } 1758 PetscCall(DMDAGetSubdomainCornersIS(dm,&corners)); 1759 PetscCall(MatISGetLocalMat(pc->pmat,&lA)); 1760 PetscCall(MatGetLocalToGlobalMapping(lA,&l2l,NULL)); 1761 PetscCall(MatISRestoreLocalMat(pc->pmat,&lA)); 1762 lo = (PetscBool)(l2l && corners); 1763 PetscCall(MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 1764 if (gl) { /* From PETSc's DMDA */ 1765 const PetscInt *idx; 1766 PetscInt dof,bs,*idxout,n; 1767 1768 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1769 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l,&bs)); 1770 PetscCall(ISGetLocalSize(corners,&n)); 1771 PetscCall(ISGetIndices(corners,&idx)); 1772 if (bs == dof) { 1773 PetscCall(PetscMalloc1(n,&idxout)); 1774 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout)); 1775 } else { /* the original DMDA local-to-local map have been modified */ 1776 PetscInt i,d; 1777 1778 PetscCall(PetscMalloc1(dof*n,&idxout)); 1779 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1780 PetscCall(ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout)); 1781 1782 bs = 1; 1783 n *= dof; 1784 } 1785 PetscCall(ISRestoreIndices(corners,&idx)); 1786 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1787 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners)); 1788 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,corners)); 1789 PetscCall(ISDestroy(&corners)); 1790 pcbddc->corner_selected = PETSC_TRUE; 1791 pcbddc->corner_selection = PETSC_TRUE; 1792 } 1793 if (corners) { 1794 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1795 } 1796 } 1797 } 1798 } 1799 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1800 DM dm; 1801 1802 PetscCall(MatGetDM(pc->pmat,&dm)); 1803 if (!dm) { 1804 PetscCall(PCGetDM(pc,&dm)); 1805 } 1806 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1807 Vec vcoords; 1808 PetscSection section; 1809 PetscReal *coords; 1810 PetscInt d,cdim,nl,nf,**ctxs; 1811 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1812 /* debug coordinates */ 1813 PetscViewer viewer; 1814 PetscBool flg; 1815 PetscViewerFormat format; 1816 const char *prefix; 1817 1818 PetscCall(DMGetCoordinateDim(dm,&cdim)); 1819 PetscCall(DMGetLocalSection(dm,§ion)); 1820 PetscCall(PetscSectionGetNumFields(section,&nf)); 1821 PetscCall(DMCreateGlobalVector(dm,&vcoords)); 1822 PetscCall(VecGetLocalSize(vcoords,&nl)); 1823 PetscCall(PetscMalloc1(nl*cdim,&coords)); 1824 PetscCall(PetscMalloc2(nf,&funcs,nf,&ctxs)); 1825 PetscCall(PetscMalloc1(nf,&ctxs[0])); 1826 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1827 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1828 1829 /* debug coordinates */ 1830 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 1831 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords),((PetscObject)vcoords)->options,prefix,"-pc_bddc_coords_vec_view",&viewer,&format,&flg)); 1832 if (flg) PetscCall(PetscViewerPushFormat(viewer,format)); 1833 for (d=0;d<cdim;d++) { 1834 PetscInt i; 1835 const PetscScalar *v; 1836 char name[16]; 1837 1838 for (i=0;i<nf;i++) ctxs[i][0] = d; 1839 PetscCall(PetscSNPrintf(name,sizeof(name),"bddc_coords_%d",(int)d)); 1840 PetscCall(PetscObjectSetName((PetscObject)vcoords,name)); 1841 PetscCall(DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords)); 1842 if (flg) PetscCall(VecView(vcoords,viewer)); 1843 PetscCall(VecGetArrayRead(vcoords,&v)); 1844 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1845 PetscCall(VecRestoreArrayRead(vcoords,&v)); 1846 } 1847 PetscCall(VecDestroy(&vcoords)); 1848 PetscCall(PCSetCoordinates(pc,cdim,nl,coords)); 1849 PetscCall(PetscFree(coords)); 1850 PetscCall(PetscFree(ctxs[0])); 1851 PetscCall(PetscFree2(funcs,ctxs)); 1852 if (flg) { 1853 PetscCall(PetscViewerPopFormat(viewer)); 1854 PetscCall(PetscViewerDestroy(&viewer)); 1855 } 1856 } 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1862 { 1863 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1864 IS nis; 1865 const PetscInt *idxs; 1866 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1867 1868 PetscFunctionBegin; 1869 PetscCheck(mop == MPI_LAND || mop == MPI_LOR,PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1870 if (mop == MPI_LAND) { 1871 /* init rootdata with true */ 1872 for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1; 1873 } else { 1874 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 1875 } 1876 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 1877 PetscCall(ISGetLocalSize(*is,&nd)); 1878 PetscCall(ISGetIndices(*is,&idxs)); 1879 for (i=0;i<nd;i++) 1880 if (-1 < idxs[i] && idxs[i] < n) 1881 matis->sf_leafdata[idxs[i]] = 1; 1882 PetscCall(ISRestoreIndices(*is,&idxs)); 1883 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1884 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1885 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1886 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1887 if (mop == MPI_LAND) { 1888 PetscCall(PetscMalloc1(nd,&nidxs)); 1889 } else { 1890 PetscCall(PetscMalloc1(n,&nidxs)); 1891 } 1892 for (i=0,nnd=0;i<n;i++) 1893 if (matis->sf_leafdata[i]) 1894 nidxs[nnd++] = i; 1895 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis)); 1896 PetscCall(ISDestroy(is)); 1897 *is = nis; 1898 PetscFunctionReturn(0); 1899 } 1900 1901 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1902 { 1903 PC_IS *pcis = (PC_IS*)(pc->data); 1904 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1905 1906 PetscFunctionBegin; 1907 if (!pcbddc->benign_have_null) { 1908 PetscFunctionReturn(0); 1909 } 1910 if (pcbddc->ChangeOfBasisMatrix) { 1911 Vec swap; 1912 1913 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change)); 1914 swap = pcbddc->work_change; 1915 pcbddc->work_change = r; 1916 r = swap; 1917 } 1918 PetscCall(VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1919 PetscCall(VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1920 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1921 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D)); 1922 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1923 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 1924 PetscCall(VecSet(z,0.)); 1925 PetscCall(VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1926 PetscCall(VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1927 if (pcbddc->ChangeOfBasisMatrix) { 1928 pcbddc->work_change = r; 1929 PetscCall(VecCopy(z,pcbddc->work_change)); 1930 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z)); 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1936 { 1937 PCBDDCBenignMatMult_ctx ctx; 1938 PetscBool apply_right,apply_left,reset_x; 1939 1940 PetscFunctionBegin; 1941 PetscCall(MatShellGetContext(A,&ctx)); 1942 if (transpose) { 1943 apply_right = ctx->apply_left; 1944 apply_left = ctx->apply_right; 1945 } else { 1946 apply_right = ctx->apply_right; 1947 apply_left = ctx->apply_left; 1948 } 1949 reset_x = PETSC_FALSE; 1950 if (apply_right) { 1951 const PetscScalar *ax; 1952 PetscInt nl,i; 1953 1954 PetscCall(VecGetLocalSize(x,&nl)); 1955 PetscCall(VecGetArrayRead(x,&ax)); 1956 PetscCall(PetscArraycpy(ctx->work,ax,nl)); 1957 PetscCall(VecRestoreArrayRead(x,&ax)); 1958 for (i=0;i<ctx->benign_n;i++) { 1959 PetscScalar sum,val; 1960 const PetscInt *idxs; 1961 PetscInt nz,j; 1962 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1963 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1964 sum = 0.; 1965 if (ctx->apply_p0) { 1966 val = ctx->work[idxs[nz-1]]; 1967 for (j=0;j<nz-1;j++) { 1968 sum += ctx->work[idxs[j]]; 1969 ctx->work[idxs[j]] += val; 1970 } 1971 } else { 1972 for (j=0;j<nz-1;j++) { 1973 sum += ctx->work[idxs[j]]; 1974 } 1975 } 1976 ctx->work[idxs[nz-1]] -= sum; 1977 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1978 } 1979 PetscCall(VecPlaceArray(x,ctx->work)); 1980 reset_x = PETSC_TRUE; 1981 } 1982 if (transpose) { 1983 PetscCall(MatMultTranspose(ctx->A,x,y)); 1984 } else { 1985 PetscCall(MatMult(ctx->A,x,y)); 1986 } 1987 if (reset_x) PetscCall(VecResetArray(x)); 1988 if (apply_left) { 1989 PetscScalar *ay; 1990 PetscInt i; 1991 1992 PetscCall(VecGetArray(y,&ay)); 1993 for (i=0;i<ctx->benign_n;i++) { 1994 PetscScalar sum,val; 1995 const PetscInt *idxs; 1996 PetscInt nz,j; 1997 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1998 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1999 val = -ay[idxs[nz-1]]; 2000 if (ctx->apply_p0) { 2001 sum = 0.; 2002 for (j=0;j<nz-1;j++) { 2003 sum += ay[idxs[j]]; 2004 ay[idxs[j]] += val; 2005 } 2006 ay[idxs[nz-1]] += sum; 2007 } else { 2008 for (j=0;j<nz-1;j++) { 2009 ay[idxs[j]] += val; 2010 } 2011 ay[idxs[nz-1]] = 0.; 2012 } 2013 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 2014 } 2015 PetscCall(VecRestoreArray(y,&ay)); 2016 } 2017 PetscFunctionReturn(0); 2018 } 2019 2020 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2021 { 2022 PetscFunctionBegin; 2023 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE)); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2028 { 2029 PetscFunctionBegin; 2030 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE)); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2035 { 2036 PC_IS *pcis = (PC_IS*)pc->data; 2037 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2038 PCBDDCBenignMatMult_ctx ctx; 2039 2040 PetscFunctionBegin; 2041 if (!restore) { 2042 Mat A_IB,A_BI; 2043 PetscScalar *work; 2044 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2045 2046 PetscCheck(!pcbddc->benign_original_mat,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2047 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2048 PetscCall(PetscMalloc1(pcis->n,&work)); 2049 PetscCall(MatCreate(PETSC_COMM_SELF,&A_IB)); 2050 PetscCall(MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE)); 2051 PetscCall(MatSetType(A_IB,MATSHELL)); 2052 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private)); 2053 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2054 PetscCall(PetscNew(&ctx)); 2055 PetscCall(MatShellSetContext(A_IB,ctx)); 2056 ctx->apply_left = PETSC_TRUE; 2057 ctx->apply_right = PETSC_FALSE; 2058 ctx->apply_p0 = PETSC_FALSE; 2059 ctx->benign_n = pcbddc->benign_n; 2060 if (reuse) { 2061 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2062 ctx->free = PETSC_FALSE; 2063 } else { /* TODO: could be optimized for successive solves */ 2064 ISLocalToGlobalMapping N_to_D; 2065 PetscInt i; 2066 2067 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D)); 2068 PetscCall(PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs)); 2069 for (i=0;i<pcbddc->benign_n;i++) { 2070 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i])); 2071 } 2072 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2073 ctx->free = PETSC_TRUE; 2074 } 2075 ctx->A = pcis->A_IB; 2076 ctx->work = work; 2077 PetscCall(MatSetUp(A_IB)); 2078 PetscCall(MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY)); 2079 PetscCall(MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY)); 2080 pcis->A_IB = A_IB; 2081 2082 /* A_BI as A_IB^T */ 2083 PetscCall(MatCreateTranspose(A_IB,&A_BI)); 2084 pcbddc->benign_original_mat = pcis->A_BI; 2085 pcis->A_BI = A_BI; 2086 } else { 2087 if (!pcbddc->benign_original_mat) { 2088 PetscFunctionReturn(0); 2089 } 2090 PetscCall(MatShellGetContext(pcis->A_IB,&ctx)); 2091 PetscCall(MatDestroy(&pcis->A_IB)); 2092 pcis->A_IB = ctx->A; 2093 ctx->A = NULL; 2094 PetscCall(MatDestroy(&pcis->A_BI)); 2095 pcis->A_BI = pcbddc->benign_original_mat; 2096 pcbddc->benign_original_mat = NULL; 2097 if (ctx->free) { 2098 PetscInt i; 2099 for (i=0;i<ctx->benign_n;i++) { 2100 PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2101 } 2102 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2103 } 2104 PetscCall(PetscFree(ctx->work)); 2105 PetscCall(PetscFree(ctx)); 2106 } 2107 PetscFunctionReturn(0); 2108 } 2109 2110 /* used just in bddc debug mode */ 2111 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2112 { 2113 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2114 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2115 Mat An; 2116 2117 PetscFunctionBegin; 2118 PetscCall(MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An)); 2119 PetscCall(MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL)); 2120 if (is1) { 2121 PetscCall(MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B)); 2122 PetscCall(MatDestroy(&An)); 2123 } else { 2124 *B = An; 2125 } 2126 PetscFunctionReturn(0); 2127 } 2128 2129 /* TODO: add reuse flag */ 2130 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2131 { 2132 Mat Bt; 2133 PetscScalar *a,*bdata; 2134 const PetscInt *ii,*ij; 2135 PetscInt m,n,i,nnz,*bii,*bij; 2136 PetscBool flg_row; 2137 2138 PetscFunctionBegin; 2139 PetscCall(MatGetSize(A,&n,&m)); 2140 PetscCall(MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2141 PetscCall(MatSeqAIJGetArray(A,&a)); 2142 nnz = n; 2143 for (i=0;i<ii[n];i++) { 2144 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2145 } 2146 PetscCall(PetscMalloc1(n+1,&bii)); 2147 PetscCall(PetscMalloc1(nnz,&bij)); 2148 PetscCall(PetscMalloc1(nnz,&bdata)); 2149 nnz = 0; 2150 bii[0] = 0; 2151 for (i=0;i<n;i++) { 2152 PetscInt j; 2153 for (j=ii[i];j<ii[i+1];j++) { 2154 PetscScalar entry = a[j]; 2155 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2156 bij[nnz] = ij[j]; 2157 bdata[nnz] = entry; 2158 nnz++; 2159 } 2160 } 2161 bii[i+1] = nnz; 2162 } 2163 PetscCall(MatSeqAIJRestoreArray(A,&a)); 2164 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt)); 2165 PetscCall(MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2166 { 2167 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2168 b->free_a = PETSC_TRUE; 2169 b->free_ij = PETSC_TRUE; 2170 } 2171 if (*B == A) { 2172 PetscCall(MatDestroy(&A)); 2173 } 2174 *B = Bt; 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2179 { 2180 Mat B = NULL; 2181 DM dm; 2182 IS is_dummy,*cc_n; 2183 ISLocalToGlobalMapping l2gmap_dummy; 2184 PCBDDCGraph graph; 2185 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2186 PetscInt i,n; 2187 PetscInt *xadj,*adjncy; 2188 PetscBool isplex = PETSC_FALSE; 2189 2190 PetscFunctionBegin; 2191 if (ncc) *ncc = 0; 2192 if (cc) *cc = NULL; 2193 if (primalv) *primalv = NULL; 2194 PetscCall(PCBDDCGraphCreate(&graph)); 2195 PetscCall(MatGetDM(pc->pmat,&dm)); 2196 if (!dm) { 2197 PetscCall(PCGetDM(pc,&dm)); 2198 } 2199 if (dm) { 2200 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex)); 2201 } 2202 if (filter) isplex = PETSC_FALSE; 2203 2204 if (isplex) { /* this code has been modified from plexpartition.c */ 2205 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2206 PetscInt *adj = NULL; 2207 IS cellNumbering; 2208 const PetscInt *cellNum; 2209 PetscBool useCone, useClosure; 2210 PetscSection section; 2211 PetscSegBuffer adjBuffer; 2212 PetscSF sfPoint; 2213 2214 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2215 PetscCall(DMGetPointSF(dm, &sfPoint)); 2216 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2217 /* Build adjacency graph via a section/segbuffer */ 2218 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion)); 2219 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2220 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer)); 2221 /* Always use FVM adjacency to create partitioner graph */ 2222 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2223 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2224 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2225 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2226 for (n = 0, p = pStart; p < pEnd; p++) { 2227 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2228 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2229 adjSize = PETSC_DETERMINE; 2230 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2231 for (a = 0; a < adjSize; ++a) { 2232 const PetscInt point = adj[a]; 2233 if (pStart <= point && point < pEnd) { 2234 PetscInt *PETSC_RESTRICT pBuf; 2235 PetscCall(PetscSectionAddDof(section, p, 1)); 2236 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2237 *pBuf = point; 2238 } 2239 } 2240 n++; 2241 } 2242 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2243 /* Derive CSR graph from section/segbuffer */ 2244 PetscCall(PetscSectionSetUp(section)); 2245 PetscCall(PetscSectionGetStorageSize(section, &size)); 2246 PetscCall(PetscMalloc1(n+1, &xadj)); 2247 for (idx = 0, p = pStart; p < pEnd; p++) { 2248 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2249 PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2250 } 2251 xadj[n] = size; 2252 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2253 /* Clean up */ 2254 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2255 PetscCall(PetscSectionDestroy(§ion)); 2256 PetscCall(PetscFree(adj)); 2257 graph->xadj = xadj; 2258 graph->adjncy = adjncy; 2259 } else { 2260 Mat A; 2261 PetscBool isseqaij, flg_row; 2262 2263 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2264 if (!A->rmap->N || !A->cmap->N) { 2265 PetscCall(PCBDDCGraphDestroy(&graph)); 2266 PetscFunctionReturn(0); 2267 } 2268 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij)); 2269 if (!isseqaij && filter) { 2270 PetscBool isseqdense; 2271 2272 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense)); 2273 if (!isseqdense) { 2274 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 2275 } else { /* TODO: rectangular case and LDA */ 2276 PetscScalar *array; 2277 PetscReal chop=1.e-6; 2278 2279 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 2280 PetscCall(MatDenseGetArray(B,&array)); 2281 PetscCall(MatGetSize(B,&n,NULL)); 2282 for (i=0;i<n;i++) { 2283 PetscInt j; 2284 for (j=i+1;j<n;j++) { 2285 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2286 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2287 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2288 } 2289 } 2290 PetscCall(MatDenseRestoreArray(B,&array)); 2291 PetscCall(MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B)); 2292 } 2293 } else { 2294 PetscCall(PetscObjectReference((PetscObject)A)); 2295 B = A; 2296 } 2297 PetscCall(MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2298 2299 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2300 if (filter) { 2301 PetscScalar *data; 2302 PetscInt j,cum; 2303 2304 PetscCall(PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered)); 2305 PetscCall(MatSeqAIJGetArray(B,&data)); 2306 cum = 0; 2307 for (i=0;i<n;i++) { 2308 PetscInt t; 2309 2310 for (j=xadj[i];j<xadj[i+1];j++) { 2311 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2312 continue; 2313 } 2314 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2315 } 2316 t = xadj_filtered[i]; 2317 xadj_filtered[i] = cum; 2318 cum += t; 2319 } 2320 PetscCall(MatSeqAIJRestoreArray(B,&data)); 2321 graph->xadj = xadj_filtered; 2322 graph->adjncy = adjncy_filtered; 2323 } else { 2324 graph->xadj = xadj; 2325 graph->adjncy = adjncy; 2326 } 2327 } 2328 /* compute local connected components using PCBDDCGraph */ 2329 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy)); 2330 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy)); 2331 PetscCall(ISDestroy(&is_dummy)); 2332 PetscCall(PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT)); 2333 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2334 PetscCall(PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL)); 2335 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2336 2337 /* partial clean up */ 2338 PetscCall(PetscFree2(xadj_filtered,adjncy_filtered)); 2339 if (B) { 2340 PetscBool flg_row; 2341 PetscCall(MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2342 PetscCall(MatDestroy(&B)); 2343 } 2344 if (isplex) { 2345 PetscCall(PetscFree(xadj)); 2346 PetscCall(PetscFree(adjncy)); 2347 } 2348 2349 /* get back data */ 2350 if (isplex) { 2351 if (ncc) *ncc = graph->ncc; 2352 if (cc || primalv) { 2353 Mat A; 2354 PetscBT btv,btvt; 2355 PetscSection subSection; 2356 PetscInt *ids,cum,cump,*cids,*pids; 2357 2358 PetscCall(DMPlexGetSubdomainSection(dm,&subSection)); 2359 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2360 PetscCall(PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids)); 2361 PetscCall(PetscBTCreate(A->rmap->n,&btv)); 2362 PetscCall(PetscBTCreate(A->rmap->n,&btvt)); 2363 2364 cids[0] = 0; 2365 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2366 PetscInt j; 2367 2368 PetscCall(PetscBTMemzero(A->rmap->n,btvt)); 2369 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2370 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2371 2372 PetscCall(DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2373 for (k = 0; k < 2*size; k += 2) { 2374 PetscInt s, pp, p = closure[k], off, dof, cdof; 2375 2376 PetscCall(PetscSectionGetConstraintDof(subSection,p,&cdof)); 2377 PetscCall(PetscSectionGetOffset(subSection,p,&off)); 2378 PetscCall(PetscSectionGetDof(subSection,p,&dof)); 2379 for (s = 0; s < dof-cdof; s++) { 2380 if (PetscBTLookupSet(btvt,off+s)) continue; 2381 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2382 else pids[cump++] = off+s; /* cross-vertex */ 2383 } 2384 PetscCall(DMPlexGetTreeParent(dm,p,&pp,NULL)); 2385 if (pp != p) { 2386 PetscCall(PetscSectionGetConstraintDof(subSection,pp,&cdof)); 2387 PetscCall(PetscSectionGetOffset(subSection,pp,&off)); 2388 PetscCall(PetscSectionGetDof(subSection,pp,&dof)); 2389 for (s = 0; s < dof-cdof; s++) { 2390 if (PetscBTLookupSet(btvt,off+s)) continue; 2391 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2392 else pids[cump++] = off+s; /* cross-vertex */ 2393 } 2394 } 2395 } 2396 PetscCall(DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2397 } 2398 cids[i+1] = cum; 2399 /* mark dofs as already assigned */ 2400 for (j = cids[i]; j < cids[i+1]; j++) { 2401 PetscCall(PetscBTSet(btv,ids[j])); 2402 } 2403 } 2404 if (cc) { 2405 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2406 for (i = 0; i < graph->ncc; i++) { 2407 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i])); 2408 } 2409 *cc = cc_n; 2410 } 2411 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv)); 2412 PetscCall(PetscFree3(ids,cids,pids)); 2413 PetscCall(PetscBTDestroy(&btv)); 2414 PetscCall(PetscBTDestroy(&btvt)); 2415 } 2416 } else { 2417 if (ncc) *ncc = graph->ncc; 2418 if (cc) { 2419 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2420 for (i=0;i<graph->ncc;i++) { 2421 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i])); 2422 } 2423 *cc = cc_n; 2424 } 2425 } 2426 /* clean up graph */ 2427 graph->xadj = NULL; 2428 graph->adjncy = NULL; 2429 PetscCall(PCBDDCGraphDestroy(&graph)); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2434 { 2435 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2436 PC_IS* pcis = (PC_IS*)(pc->data); 2437 IS dirIS = NULL; 2438 PetscInt i; 2439 2440 PetscFunctionBegin; 2441 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS)); 2442 if (zerodiag) { 2443 Mat A; 2444 Vec vec3_N; 2445 PetscScalar *vals; 2446 const PetscInt *idxs; 2447 PetscInt nz,*count; 2448 2449 /* p0 */ 2450 PetscCall(VecSet(pcis->vec1_N,0.)); 2451 PetscCall(PetscMalloc1(pcis->n,&vals)); 2452 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2453 PetscCall(ISGetIndices(zerodiag,&idxs)); 2454 for (i=0;i<nz;i++) vals[i] = 1.; 2455 PetscCall(VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES)); 2456 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2457 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2458 /* v_I */ 2459 PetscCall(VecSetRandom(pcis->vec2_N,NULL)); 2460 for (i=0;i<nz;i++) vals[i] = 0.; 2461 PetscCall(VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES)); 2462 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2463 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2464 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2465 PetscCall(VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES)); 2466 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2467 if (dirIS) { 2468 PetscInt n; 2469 2470 PetscCall(ISGetLocalSize(dirIS,&n)); 2471 PetscCall(ISGetIndices(dirIS,&idxs)); 2472 for (i=0;i<n;i++) vals[i] = 0.; 2473 PetscCall(VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES)); 2474 PetscCall(ISRestoreIndices(dirIS,&idxs)); 2475 } 2476 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2477 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2478 PetscCall(VecDuplicate(pcis->vec1_N,&vec3_N)); 2479 PetscCall(VecSet(vec3_N,0.)); 2480 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2481 PetscCall(MatMult(A,pcis->vec1_N,vec3_N)); 2482 PetscCall(VecDot(vec3_N,pcis->vec2_N,&vals[0])); 2483 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",(double)PetscAbsScalar(vals[0])); 2484 PetscCall(PetscFree(vals)); 2485 PetscCall(VecDestroy(&vec3_N)); 2486 2487 /* there should not be any pressure dofs lying on the interface */ 2488 PetscCall(PetscCalloc1(pcis->n,&count)); 2489 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2490 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2491 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2492 PetscCall(ISGetIndices(zerodiag,&idxs)); 2493 for (i=0;i<nz;i++) PetscCheck(!count[idxs[i]],PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof",idxs[i]); 2494 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2495 PetscCall(PetscFree(count)); 2496 } 2497 PetscCall(ISDestroy(&dirIS)); 2498 2499 /* check PCBDDCBenignGetOrSetP0 */ 2500 PetscCall(VecSetRandom(pcis->vec1_global,NULL)); 2501 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2502 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE)); 2503 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2504 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE)); 2505 for (i=0;i<pcbddc->benign_n;i++) { 2506 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2507 PetscCheck(val == -PetscGlobalRank-i,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g",(double)PetscRealPart(pcbddc->benign_p0[i]),i,(double)(-PetscGlobalRank-i)); 2508 } 2509 PetscFunctionReturn(0); 2510 } 2511 2512 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2513 { 2514 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2515 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2516 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2517 PetscInt nz,n,benign_n,bsp = 1; 2518 PetscInt *interior_dofs,n_interior_dofs,nneu; 2519 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2520 2521 PetscFunctionBegin; 2522 if (reuse) goto project_b0; 2523 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2524 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2525 for (n=0;n<pcbddc->benign_n;n++) { 2526 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2527 } 2528 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2529 has_null_pressures = PETSC_TRUE; 2530 have_null = PETSC_TRUE; 2531 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2532 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2533 Checks if all the pressure dofs in each subdomain have a zero diagonal 2534 If not, a change of basis on pressures is not needed 2535 since the local Schur complements are already SPD 2536 */ 2537 if (pcbddc->n_ISForDofsLocal) { 2538 IS iP = NULL; 2539 PetscInt p,*pp; 2540 PetscBool flg; 2541 2542 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp)); 2543 n = pcbddc->n_ISForDofsLocal; 2544 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC"); 2545 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg)); 2546 PetscOptionsEnd(); 2547 if (!flg) { 2548 n = 1; 2549 pp[0] = pcbddc->n_ISForDofsLocal-1; 2550 } 2551 2552 bsp = 0; 2553 for (p=0;p<n;p++) { 2554 PetscInt bs; 2555 2556 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal,PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %" PetscInt_FMT,pp[p]); 2557 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2558 bsp += bs; 2559 } 2560 PetscCall(PetscMalloc1(bsp,&bzerodiag)); 2561 bsp = 0; 2562 for (p=0;p<n;p++) { 2563 const PetscInt *idxs; 2564 PetscInt b,bs,npl,*bidxs; 2565 2566 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2567 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl)); 2568 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2569 PetscCall(PetscMalloc1(npl/bs,&bidxs)); 2570 for (b=0;b<bs;b++) { 2571 PetscInt i; 2572 2573 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2574 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp])); 2575 bsp++; 2576 } 2577 PetscCall(PetscFree(bidxs)); 2578 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2579 } 2580 PetscCall(ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures)); 2581 2582 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2583 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP)); 2584 if (iP) { 2585 IS newpressures; 2586 2587 PetscCall(ISDifference(pressures,iP,&newpressures)); 2588 PetscCall(ISDestroy(&pressures)); 2589 pressures = newpressures; 2590 } 2591 PetscCall(ISSorted(pressures,&sorted)); 2592 if (!sorted) { 2593 PetscCall(ISSort(pressures)); 2594 } 2595 PetscCall(PetscFree(pp)); 2596 } 2597 2598 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2599 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2600 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2601 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag)); 2602 PetscCall(ISSorted(zerodiag,&sorted)); 2603 if (!sorted) { 2604 PetscCall(ISSort(zerodiag)); 2605 } 2606 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2607 zerodiag_save = zerodiag; 2608 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2609 if (!nz) { 2610 if (n) have_null = PETSC_FALSE; 2611 has_null_pressures = PETSC_FALSE; 2612 PetscCall(ISDestroy(&zerodiag)); 2613 } 2614 recompute_zerodiag = PETSC_FALSE; 2615 2616 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2617 zerodiag_subs = NULL; 2618 benign_n = 0; 2619 n_interior_dofs = 0; 2620 interior_dofs = NULL; 2621 nneu = 0; 2622 if (pcbddc->NeumannBoundariesLocal) { 2623 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu)); 2624 } 2625 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2626 if (checkb) { /* need to compute interior nodes */ 2627 PetscInt n,i,j; 2628 PetscInt n_neigh,*neigh,*n_shared,**shared; 2629 PetscInt *iwork; 2630 2631 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping,&n)); 2632 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2633 PetscCall(PetscCalloc1(n,&iwork)); 2634 PetscCall(PetscMalloc1(n,&interior_dofs)); 2635 for (i=1;i<n_neigh;i++) 2636 for (j=0;j<n_shared[i];j++) 2637 iwork[shared[i][j]] += 1; 2638 for (i=0;i<n;i++) 2639 if (!iwork[i]) 2640 interior_dofs[n_interior_dofs++] = i; 2641 PetscCall(PetscFree(iwork)); 2642 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2643 } 2644 if (has_null_pressures) { 2645 IS *subs; 2646 PetscInt nsubs,i,j,nl; 2647 const PetscInt *idxs; 2648 PetscScalar *array; 2649 Vec *work; 2650 2651 subs = pcbddc->local_subs; 2652 nsubs = pcbddc->n_local_subs; 2653 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2654 if (checkb) { 2655 PetscCall(VecDuplicateVecs(matis->y,2,&work)); 2656 PetscCall(ISGetLocalSize(zerodiag,&nl)); 2657 PetscCall(ISGetIndices(zerodiag,&idxs)); 2658 /* work[0] = 1_p */ 2659 PetscCall(VecSet(work[0],0.)); 2660 PetscCall(VecGetArray(work[0],&array)); 2661 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2662 PetscCall(VecRestoreArray(work[0],&array)); 2663 /* work[0] = 1_v */ 2664 PetscCall(VecSet(work[1],1.)); 2665 PetscCall(VecGetArray(work[1],&array)); 2666 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2667 PetscCall(VecRestoreArray(work[1],&array)); 2668 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2669 } 2670 2671 if (nsubs > 1 || bsp > 1) { 2672 IS *is; 2673 PetscInt b,totb; 2674 2675 totb = bsp; 2676 is = bsp > 1 ? bzerodiag : &zerodiag; 2677 nsubs = PetscMax(nsubs,1); 2678 PetscCall(PetscCalloc1(nsubs*totb,&zerodiag_subs)); 2679 for (b=0;b<totb;b++) { 2680 for (i=0;i<nsubs;i++) { 2681 ISLocalToGlobalMapping l2g; 2682 IS t_zerodiag_subs; 2683 PetscInt nl; 2684 2685 if (subs) { 2686 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i],&l2g)); 2687 } else { 2688 IS tis; 2689 2690 PetscCall(MatGetLocalSize(pcbddc->local_mat,&nl,NULL)); 2691 PetscCall(ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis)); 2692 PetscCall(ISLocalToGlobalMappingCreateIS(tis,&l2g)); 2693 PetscCall(ISDestroy(&tis)); 2694 } 2695 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs)); 2696 PetscCall(ISGetLocalSize(t_zerodiag_subs,&nl)); 2697 if (nl) { 2698 PetscBool valid = PETSC_TRUE; 2699 2700 if (checkb) { 2701 PetscCall(VecSet(matis->x,0)); 2702 PetscCall(ISGetLocalSize(subs[i],&nl)); 2703 PetscCall(ISGetIndices(subs[i],&idxs)); 2704 PetscCall(VecGetArray(matis->x,&array)); 2705 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2706 PetscCall(VecRestoreArray(matis->x,&array)); 2707 PetscCall(ISRestoreIndices(subs[i],&idxs)); 2708 PetscCall(VecPointwiseMult(matis->x,work[0],matis->x)); 2709 PetscCall(MatMult(matis->A,matis->x,matis->y)); 2710 PetscCall(VecPointwiseMult(matis->y,work[1],matis->y)); 2711 PetscCall(VecGetArray(matis->y,&array)); 2712 for (j=0;j<n_interior_dofs;j++) { 2713 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2714 valid = PETSC_FALSE; 2715 break; 2716 } 2717 } 2718 PetscCall(VecRestoreArray(matis->y,&array)); 2719 } 2720 if (valid && nneu) { 2721 const PetscInt *idxs; 2722 PetscInt nzb; 2723 2724 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2725 PetscCall(ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL)); 2726 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2727 if (nzb) valid = PETSC_FALSE; 2728 } 2729 if (valid && pressures) { 2730 IS t_pressure_subs,tmp; 2731 PetscInt i1,i2; 2732 2733 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs)); 2734 PetscCall(ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp)); 2735 PetscCall(ISGetLocalSize(tmp,&i1)); 2736 PetscCall(ISGetLocalSize(t_zerodiag_subs,&i2)); 2737 if (i2 != i1) valid = PETSC_FALSE; 2738 PetscCall(ISDestroy(&t_pressure_subs)); 2739 PetscCall(ISDestroy(&tmp)); 2740 } 2741 if (valid) { 2742 PetscCall(ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n])); 2743 benign_n++; 2744 } else recompute_zerodiag = PETSC_TRUE; 2745 } 2746 PetscCall(ISDestroy(&t_zerodiag_subs)); 2747 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2748 } 2749 } 2750 } else { /* there's just one subdomain (or zero if they have not been detected */ 2751 PetscBool valid = PETSC_TRUE; 2752 2753 if (nneu) valid = PETSC_FALSE; 2754 if (valid && pressures) { 2755 PetscCall(ISEqual(pressures,zerodiag,&valid)); 2756 } 2757 if (valid && checkb) { 2758 PetscCall(MatMult(matis->A,work[0],matis->x)); 2759 PetscCall(VecPointwiseMult(matis->x,work[1],matis->x)); 2760 PetscCall(VecGetArray(matis->x,&array)); 2761 for (j=0;j<n_interior_dofs;j++) { 2762 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2763 valid = PETSC_FALSE; 2764 break; 2765 } 2766 } 2767 PetscCall(VecRestoreArray(matis->x,&array)); 2768 } 2769 if (valid) { 2770 benign_n = 1; 2771 PetscCall(PetscMalloc1(benign_n,&zerodiag_subs)); 2772 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2773 zerodiag_subs[0] = zerodiag; 2774 } 2775 } 2776 if (checkb) { 2777 PetscCall(VecDestroyVecs(2,&work)); 2778 } 2779 } 2780 PetscCall(PetscFree(interior_dofs)); 2781 2782 if (!benign_n) { 2783 PetscInt n; 2784 2785 PetscCall(ISDestroy(&zerodiag)); 2786 recompute_zerodiag = PETSC_FALSE; 2787 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2788 if (n) have_null = PETSC_FALSE; 2789 } 2790 2791 /* final check for null pressures */ 2792 if (zerodiag && pressures) { 2793 PetscCall(ISEqual(pressures,zerodiag,&have_null)); 2794 } 2795 2796 if (recompute_zerodiag) { 2797 PetscCall(ISDestroy(&zerodiag)); 2798 if (benign_n == 1) { 2799 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2800 zerodiag = zerodiag_subs[0]; 2801 } else { 2802 PetscInt i,nzn,*new_idxs; 2803 2804 nzn = 0; 2805 for (i=0;i<benign_n;i++) { 2806 PetscInt ns; 2807 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2808 nzn += ns; 2809 } 2810 PetscCall(PetscMalloc1(nzn,&new_idxs)); 2811 nzn = 0; 2812 for (i=0;i<benign_n;i++) { 2813 PetscInt ns,*idxs; 2814 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2815 PetscCall(ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2816 PetscCall(PetscArraycpy(new_idxs+nzn,idxs,ns)); 2817 PetscCall(ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2818 nzn += ns; 2819 } 2820 PetscCall(PetscSortInt(nzn,new_idxs)); 2821 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag)); 2822 } 2823 have_null = PETSC_FALSE; 2824 } 2825 2826 /* determines if the coarse solver will be singular or not */ 2827 PetscCall(MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 2828 2829 /* Prepare matrix to compute no-net-flux */ 2830 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2831 Mat A,loc_divudotp; 2832 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2833 IS row,col,isused = NULL; 2834 PetscInt M,N,n,st,n_isused; 2835 2836 if (pressures) { 2837 isused = pressures; 2838 } else { 2839 isused = zerodiag_save; 2840 } 2841 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL)); 2842 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2843 PetscCall(MatGetLocalSize(A,&n,NULL)); 2844 PetscCheck(isused || (n == 0),PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2845 n_isused = 0; 2846 if (isused) { 2847 PetscCall(ISGetLocalSize(isused,&n_isused)); 2848 } 2849 PetscCallMPI(MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 2850 st = st-n_isused; 2851 if (n) { 2852 const PetscInt *gidxs; 2853 2854 PetscCall(MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp)); 2855 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 2856 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2857 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2858 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col)); 2859 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 2860 } else { 2861 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp)); 2862 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2863 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col)); 2864 } 2865 PetscCall(MatGetSize(pc->pmat,NULL,&N)); 2866 PetscCall(ISGetSize(row,&M)); 2867 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 2868 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 2869 PetscCall(ISDestroy(&row)); 2870 PetscCall(ISDestroy(&col)); 2871 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp)); 2872 PetscCall(MatSetType(pcbddc->divudotp,MATIS)); 2873 PetscCall(MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N)); 2874 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g)); 2875 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2876 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2877 PetscCall(MatISSetLocalMat(pcbddc->divudotp,loc_divudotp)); 2878 PetscCall(MatDestroy(&loc_divudotp)); 2879 PetscCall(MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2880 PetscCall(MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2881 } 2882 PetscCall(ISDestroy(&zerodiag_save)); 2883 PetscCall(ISDestroy(&pressures)); 2884 if (bzerodiag) { 2885 PetscInt i; 2886 2887 for (i=0;i<bsp;i++) { 2888 PetscCall(ISDestroy(&bzerodiag[i])); 2889 } 2890 PetscCall(PetscFree(bzerodiag)); 2891 } 2892 pcbddc->benign_n = benign_n; 2893 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2894 2895 /* determines if the problem has subdomains with 0 pressure block */ 2896 have_null = (PetscBool)(!!pcbddc->benign_n); 2897 PetscCall(MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 2898 2899 project_b0: 2900 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2901 /* change of basis and p0 dofs */ 2902 if (pcbddc->benign_n) { 2903 PetscInt i,s,*nnz; 2904 2905 /* local change of basis for pressures */ 2906 PetscCall(MatDestroy(&pcbddc->benign_change)); 2907 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change)); 2908 PetscCall(MatSetType(pcbddc->benign_change,MATAIJ)); 2909 PetscCall(MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE)); 2910 PetscCall(PetscMalloc1(n,&nnz)); 2911 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2912 for (i=0;i<pcbddc->benign_n;i++) { 2913 const PetscInt *idxs; 2914 PetscInt nzs,j; 2915 2916 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs)); 2917 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2918 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2919 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2920 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2921 } 2922 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz)); 2923 PetscCall(MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 2924 PetscCall(PetscFree(nnz)); 2925 /* set identity by default */ 2926 for (i=0;i<n;i++) { 2927 PetscCall(MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES)); 2928 } 2929 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 2930 PetscCall(PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0)); 2931 /* set change on pressures */ 2932 for (s=0;s<pcbddc->benign_n;s++) { 2933 PetscScalar *array; 2934 const PetscInt *idxs; 2935 PetscInt nzs; 2936 2937 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs)); 2938 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2939 for (i=0;i<nzs-1;i++) { 2940 PetscScalar vals[2]; 2941 PetscInt cols[2]; 2942 2943 cols[0] = idxs[i]; 2944 cols[1] = idxs[nzs-1]; 2945 vals[0] = 1.; 2946 vals[1] = 1.; 2947 PetscCall(MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES)); 2948 } 2949 PetscCall(PetscMalloc1(nzs,&array)); 2950 for (i=0;i<nzs-1;i++) array[i] = -1.; 2951 array[nzs-1] = 1.; 2952 PetscCall(MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES)); 2953 /* store local idxs for p0 */ 2954 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2955 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2956 PetscCall(PetscFree(array)); 2957 } 2958 PetscCall(MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2959 PetscCall(MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2960 2961 /* project if needed */ 2962 if (pcbddc->benign_change_explicit) { 2963 Mat M; 2964 2965 PetscCall(MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M)); 2966 PetscCall(MatDestroy(&pcbddc->local_mat)); 2967 PetscCall(MatSeqAIJCompress(M,&pcbddc->local_mat)); 2968 PetscCall(MatDestroy(&M)); 2969 } 2970 /* store global idxs for p0 */ 2971 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx)); 2972 } 2973 *zerodiaglocal = zerodiag; 2974 PetscFunctionReturn(0); 2975 } 2976 2977 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2978 { 2979 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2980 PetscScalar *array; 2981 2982 PetscFunctionBegin; 2983 if (!pcbddc->benign_sf) { 2984 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf)); 2985 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx)); 2986 } 2987 if (get) { 2988 PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array)); 2989 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 2990 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 2991 PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 2992 } else { 2993 PetscCall(VecGetArray(v,&array)); 2994 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 2995 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 2996 PetscCall(VecRestoreArray(v,&array)); 2997 } 2998 PetscFunctionReturn(0); 2999 } 3000 3001 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3002 { 3003 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3004 3005 PetscFunctionBegin; 3006 /* TODO: add error checking 3007 - avoid nested pop (or push) calls. 3008 - cannot push before pop. 3009 - cannot call this if pcbddc->local_mat is NULL 3010 */ 3011 if (!pcbddc->benign_n) { 3012 PetscFunctionReturn(0); 3013 } 3014 if (pop) { 3015 if (pcbddc->benign_change_explicit) { 3016 IS is_p0; 3017 MatReuse reuse; 3018 3019 /* extract B_0 */ 3020 reuse = MAT_INITIAL_MATRIX; 3021 if (pcbddc->benign_B0) { 3022 reuse = MAT_REUSE_MATRIX; 3023 } 3024 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0)); 3025 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0)); 3026 /* remove rows and cols from local problem */ 3027 PetscCall(MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE)); 3028 PetscCall(MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 3029 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL)); 3030 PetscCall(ISDestroy(&is_p0)); 3031 } else { 3032 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3033 PetscScalar *vals; 3034 PetscInt i,n,*idxs_ins; 3035 3036 PetscCall(VecGetLocalSize(matis->y,&n)); 3037 PetscCall(PetscMalloc2(n,&idxs_ins,n,&vals)); 3038 if (!pcbddc->benign_B0) { 3039 PetscInt *nnz; 3040 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0)); 3041 PetscCall(MatSetType(pcbddc->benign_B0,MATAIJ)); 3042 PetscCall(MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE)); 3043 PetscCall(PetscMalloc1(pcbddc->benign_n,&nnz)); 3044 for (i=0;i<pcbddc->benign_n;i++) { 3045 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i])); 3046 nnz[i] = n - nnz[i]; 3047 } 3048 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz)); 3049 PetscCall(MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 3050 PetscCall(PetscFree(nnz)); 3051 } 3052 3053 for (i=0;i<pcbddc->benign_n;i++) { 3054 PetscScalar *array; 3055 PetscInt *idxs,j,nz,cum; 3056 3057 PetscCall(VecSet(matis->x,0.)); 3058 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz)); 3059 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3060 for (j=0;j<nz;j++) vals[j] = 1.; 3061 PetscCall(VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES)); 3062 PetscCall(VecAssemblyBegin(matis->x)); 3063 PetscCall(VecAssemblyEnd(matis->x)); 3064 PetscCall(VecSet(matis->y,0.)); 3065 PetscCall(MatMult(matis->A,matis->x,matis->y)); 3066 PetscCall(VecGetArray(matis->y,&array)); 3067 cum = 0; 3068 for (j=0;j<n;j++) { 3069 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3070 vals[cum] = array[j]; 3071 idxs_ins[cum] = j; 3072 cum++; 3073 } 3074 } 3075 PetscCall(MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES)); 3076 PetscCall(VecRestoreArray(matis->y,&array)); 3077 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3078 } 3079 PetscCall(MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3080 PetscCall(MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3081 PetscCall(PetscFree2(idxs_ins,vals)); 3082 } 3083 } else { /* push */ 3084 if (pcbddc->benign_change_explicit) { 3085 PetscInt i; 3086 3087 for (i=0;i<pcbddc->benign_n;i++) { 3088 PetscScalar *B0_vals; 3089 PetscInt *B0_cols,B0_ncol; 3090 3091 PetscCall(MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3092 PetscCall(MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES)); 3093 PetscCall(MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES)); 3094 PetscCall(MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES)); 3095 PetscCall(MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3096 } 3097 PetscCall(MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3098 PetscCall(MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3099 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3100 } 3101 PetscFunctionReturn(0); 3102 } 3103 3104 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3105 { 3106 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3107 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3108 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3109 PetscBLASInt *B_iwork,*B_ifail; 3110 PetscScalar *work,lwork; 3111 PetscScalar *St,*S,*eigv; 3112 PetscScalar *Sarray,*Starray; 3113 PetscReal *eigs,thresh,lthresh,uthresh; 3114 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3115 PetscBool allocated_S_St,upart; 3116 #if defined(PETSC_USE_COMPLEX) 3117 PetscReal *rwork; 3118 #endif 3119 3120 PetscFunctionBegin; 3121 if (!pcbddc->adaptive_selection) PetscFunctionReturn(0); 3122 PetscCheck(sub_schurs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3123 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3124 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3125 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3126 3127 if (pcbddc->dbg_flag) { 3128 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3129 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3130 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 3131 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n")); 3132 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3133 } 3134 3135 if (pcbddc->dbg_flag) { 3136 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef)); 3137 } 3138 3139 /* max size of subsets */ 3140 mss = 0; 3141 for (i=0;i<sub_schurs->n_subs;i++) { 3142 PetscInt subset_size; 3143 3144 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3145 mss = PetscMax(mss,subset_size); 3146 } 3147 3148 /* min/max and threshold */ 3149 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3150 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3151 nmax = PetscMax(nmin,nmax); 3152 allocated_S_St = PETSC_FALSE; 3153 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3154 allocated_S_St = PETSC_TRUE; 3155 } 3156 3157 /* allocate lapack workspace */ 3158 cum = cum2 = 0; 3159 maxneigs = 0; 3160 for (i=0;i<sub_schurs->n_subs;i++) { 3161 PetscInt n,subset_size; 3162 3163 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3164 n = PetscMin(subset_size,nmax); 3165 cum += subset_size; 3166 cum2 += subset_size*n; 3167 maxneigs = PetscMax(maxneigs,n); 3168 } 3169 lwork = 0; 3170 if (mss) { 3171 if (sub_schurs->is_symmetric) { 3172 PetscScalar sdummy = 0.; 3173 PetscBLASInt B_itype = 1; 3174 PetscBLASInt B_N = mss, idummy = 0; 3175 PetscReal rdummy = 0.,zero = 0.0; 3176 PetscReal eps = 0.0; /* dlamch? */ 3177 3178 B_lwork = -1; 3179 /* some implementations may complain about NULL pointers, even if we are querying */ 3180 S = &sdummy; 3181 St = &sdummy; 3182 eigs = &rdummy; 3183 eigv = &sdummy; 3184 B_iwork = &idummy; 3185 B_ifail = &idummy; 3186 #if defined(PETSC_USE_COMPLEX) 3187 rwork = &rdummy; 3188 #endif 3189 thresh = 1.0; 3190 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3191 #if defined(PETSC_USE_COMPLEX) 3192 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3193 #else 3194 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3195 #endif 3196 PetscCheck(B_ierr == 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3197 PetscCall(PetscFPTrapPop()); 3198 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3199 } 3200 3201 nv = 0; 3202 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3203 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv)); 3204 } 3205 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork)); 3206 if (allocated_S_St) { 3207 PetscCall(PetscMalloc2(mss*mss,&S,mss*mss,&St)); 3208 } 3209 PetscCall(PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail)); 3210 #if defined(PETSC_USE_COMPLEX) 3211 PetscCall(PetscMalloc1(7*mss,&rwork)); 3212 #endif 3213 PetscCall(PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3214 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3215 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3216 nv+cum,&pcbddc->adaptive_constraints_idxs, 3217 nv+cum2,&pcbddc->adaptive_constraints_data)); 3218 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs)); 3219 3220 maxneigs = 0; 3221 cum = cumarray = 0; 3222 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3223 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3224 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3225 const PetscInt *idxs; 3226 3227 PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs)); 3228 for (cum=0;cum<nv;cum++) { 3229 pcbddc->adaptive_constraints_n[cum] = 1; 3230 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3231 pcbddc->adaptive_constraints_data[cum] = 1.0; 3232 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3233 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3234 } 3235 PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 3236 } 3237 3238 if (mss) { /* multilevel */ 3239 if (sub_schurs->gdsw) { 3240 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray)); 3241 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3242 } else { 3243 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3244 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3245 } 3246 } 3247 3248 lthresh = pcbddc->adaptive_threshold[0]; 3249 uthresh = pcbddc->adaptive_threshold[1]; 3250 upart = pcbddc->use_deluxe_scaling; 3251 for (i=0;i<sub_schurs->n_subs;i++) { 3252 const PetscInt *idxs; 3253 PetscReal upper,lower; 3254 PetscInt j,subset_size,eigs_start = 0; 3255 PetscBLASInt B_N; 3256 PetscBool same_data = PETSC_FALSE; 3257 PetscBool scal = PETSC_FALSE; 3258 3259 if (upart) { 3260 upper = PETSC_MAX_REAL; 3261 lower = uthresh; 3262 } else { 3263 if (sub_schurs->gdsw) { 3264 upper = uthresh; 3265 lower = PETSC_MIN_REAL; 3266 } else { 3267 PetscCheck(sub_schurs->is_posdef,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3268 upper = 1./uthresh; 3269 lower = 0.; 3270 } 3271 } 3272 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3273 PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs)); 3274 PetscCall(PetscBLASIntCast(subset_size,&B_N)); 3275 /* this is experimental: we assume the dofs have been properly grouped to have 3276 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3277 if (!sub_schurs->is_posdef) { 3278 Mat T; 3279 3280 for (j=0;j<subset_size;j++) { 3281 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3282 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T)); 3283 PetscCall(MatScale(T,-1.0)); 3284 PetscCall(MatDestroy(&T)); 3285 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T)); 3286 PetscCall(MatScale(T,-1.0)); 3287 PetscCall(MatDestroy(&T)); 3288 if (sub_schurs->change_primal_sub) { 3289 PetscInt nz,k; 3290 const PetscInt *idxs; 3291 3292 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz)); 3293 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i],&idxs)); 3294 for (k=0;k<nz;k++) { 3295 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3296 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3297 } 3298 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs)); 3299 } 3300 scal = PETSC_TRUE; 3301 break; 3302 } 3303 } 3304 } 3305 3306 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3307 if (sub_schurs->is_symmetric) { 3308 PetscInt j,k; 3309 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3310 PetscCall(PetscArrayzero(S,subset_size*subset_size)); 3311 PetscCall(PetscArrayzero(St,subset_size*subset_size)); 3312 } 3313 for (j=0;j<subset_size;j++) { 3314 for (k=j;k<subset_size;k++) { 3315 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3316 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3317 } 3318 } 3319 } else { 3320 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3321 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3322 } 3323 } else { 3324 S = Sarray + cumarray; 3325 St = Starray + cumarray; 3326 } 3327 /* see if we can save some work */ 3328 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3329 PetscCall(PetscArraycmp(S,St,subset_size*subset_size,&same_data)); 3330 } 3331 3332 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3333 B_neigs = 0; 3334 } else { 3335 if (sub_schurs->is_symmetric) { 3336 PetscBLASInt B_itype = 1; 3337 PetscBLASInt B_IL, B_IU; 3338 PetscReal eps = -1.0; /* dlamch? */ 3339 PetscInt nmin_s; 3340 PetscBool compute_range; 3341 3342 B_neigs = 0; 3343 compute_range = (PetscBool)!same_data; 3344 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3345 3346 if (pcbddc->dbg_flag) { 3347 PetscInt nc = 0; 3348 3349 if (sub_schurs->change_primal_sub) { 3350 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc)); 3351 } 3352 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc)); 3353 } 3354 3355 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3356 if (compute_range) { 3357 3358 /* ask for eigenvalues larger than thresh */ 3359 if (sub_schurs->is_posdef) { 3360 #if defined(PETSC_USE_COMPLEX) 3361 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3362 #else 3363 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3364 #endif 3365 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3366 } else { /* no theory so far, but it works nicely */ 3367 PetscInt recipe = 0,recipe_m = 1; 3368 PetscReal bb[2]; 3369 3370 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL)); 3371 switch (recipe) { 3372 case 0: 3373 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3374 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3375 #if defined(PETSC_USE_COMPLEX) 3376 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3377 #else 3378 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3379 #endif 3380 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3381 break; 3382 case 1: 3383 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3384 #if defined(PETSC_USE_COMPLEX) 3385 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3386 #else 3387 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3388 #endif 3389 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3390 if (!scal) { 3391 PetscBLASInt B_neigs2 = 0; 3392 3393 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3394 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3395 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3396 #if defined(PETSC_USE_COMPLEX) 3397 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3398 #else 3399 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3400 #endif 3401 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3402 B_neigs += B_neigs2; 3403 } 3404 break; 3405 case 2: 3406 if (scal) { 3407 bb[0] = PETSC_MIN_REAL; 3408 bb[1] = 0; 3409 #if defined(PETSC_USE_COMPLEX) 3410 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3411 #else 3412 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3413 #endif 3414 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3415 } else { 3416 PetscBLASInt B_neigs2 = 0; 3417 PetscBool import = PETSC_FALSE; 3418 3419 lthresh = PetscMax(lthresh,0.0); 3420 if (lthresh > 0.0) { 3421 bb[0] = PETSC_MIN_REAL; 3422 bb[1] = lthresh*lthresh; 3423 3424 import = PETSC_TRUE; 3425 #if defined(PETSC_USE_COMPLEX) 3426 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3427 #else 3428 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3429 #endif 3430 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3431 } 3432 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3433 bb[1] = PETSC_MAX_REAL; 3434 if (import) { 3435 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3436 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3437 } 3438 #if defined(PETSC_USE_COMPLEX) 3439 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3440 #else 3441 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3442 #endif 3443 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3444 B_neigs += B_neigs2; 3445 } 3446 break; 3447 case 3: 3448 if (scal) { 3449 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL)); 3450 } else { 3451 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL)); 3452 } 3453 if (!scal) { 3454 bb[0] = uthresh; 3455 bb[1] = PETSC_MAX_REAL; 3456 #if defined(PETSC_USE_COMPLEX) 3457 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3458 #else 3459 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3460 #endif 3461 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3462 } 3463 if (recipe_m > 0 && B_N - B_neigs > 0) { 3464 PetscBLASInt B_neigs2 = 0; 3465 3466 B_IL = 1; 3467 PetscCall(PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU)); 3468 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3469 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3470 #if defined(PETSC_USE_COMPLEX) 3471 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3472 #else 3473 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3474 #endif 3475 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3476 B_neigs += B_neigs2; 3477 } 3478 break; 3479 case 4: 3480 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3481 #if defined(PETSC_USE_COMPLEX) 3482 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3483 #else 3484 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3485 #endif 3486 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3487 { 3488 PetscBLASInt B_neigs2 = 0; 3489 3490 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3491 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3492 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3493 #if defined(PETSC_USE_COMPLEX) 3494 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3495 #else 3496 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3497 #endif 3498 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3499 B_neigs += B_neigs2; 3500 } 3501 break; 3502 case 5: /* same as before: first compute all eigenvalues, then filter */ 3503 #if defined(PETSC_USE_COMPLEX) 3504 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3505 #else 3506 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3507 #endif 3508 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3509 { 3510 PetscInt e,k,ne; 3511 for (e=0,ne=0;e<B_neigs;e++) { 3512 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3513 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3514 eigs[ne] = eigs[e]; 3515 ne++; 3516 } 3517 } 3518 PetscCall(PetscArraycpy(eigv,S,B_N*ne)); 3519 B_neigs = ne; 3520 } 3521 break; 3522 default: 3523 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %" PetscInt_FMT,recipe); 3524 } 3525 } 3526 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3527 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3528 B_IL = 1; 3529 #if defined(PETSC_USE_COMPLEX) 3530 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3531 #else 3532 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3533 #endif 3534 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3535 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3536 PetscInt k; 3537 PetscCheck(sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3538 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax)); 3539 PetscCall(PetscBLASIntCast(nmax,&B_neigs)); 3540 nmin = nmax; 3541 PetscCall(PetscArrayzero(eigv,subset_size*nmax)); 3542 for (k=0;k<nmax;k++) { 3543 eigs[k] = 1./PETSC_SMALL; 3544 eigv[k*(subset_size+1)] = 1.0; 3545 } 3546 } 3547 PetscCall(PetscFPTrapPop()); 3548 if (B_ierr) { 3549 PetscCheck(B_ierr >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT,-B_ierr); 3550 PetscCheck(B_ierr > B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge",B_ierr); 3551 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite",B_ierr-B_N-1); 3552 } 3553 3554 if (B_neigs > nmax) { 3555 if (pcbddc->dbg_flag) { 3556 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n",B_neigs,nmax)); 3557 } 3558 if (upart) eigs_start = scal ? 0 : B_neigs-nmax; 3559 B_neigs = nmax; 3560 } 3561 3562 nmin_s = PetscMin(nmin,B_N); 3563 if (B_neigs < nmin_s) { 3564 PetscBLASInt B_neigs2 = 0; 3565 3566 if (upart) { 3567 if (scal) { 3568 B_IU = nmin_s; 3569 B_IL = B_neigs + 1; 3570 } else { 3571 B_IL = B_N - nmin_s + 1; 3572 B_IU = B_N - B_neigs; 3573 } 3574 } else { 3575 B_IL = B_neigs + 1; 3576 B_IU = nmin_s; 3577 } 3578 if (pcbddc->dbg_flag) { 3579 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU)); 3580 } 3581 if (sub_schurs->is_symmetric) { 3582 PetscInt j,k; 3583 for (j=0;j<subset_size;j++) { 3584 for (k=j;k<subset_size;k++) { 3585 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3586 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3587 } 3588 } 3589 } else { 3590 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3591 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3592 } 3593 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3594 #if defined(PETSC_USE_COMPLEX) 3595 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3596 #else 3597 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3598 #endif 3599 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3600 PetscCall(PetscFPTrapPop()); 3601 B_neigs += B_neigs2; 3602 } 3603 if (B_ierr) { 3604 PetscCheck(B_ierr >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT,-B_ierr); 3605 PetscCheck(B_ierr > B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge",B_ierr); 3606 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite",B_ierr-B_N-1); 3607 } 3608 if (pcbddc->dbg_flag) { 3609 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %" PetscBLASInt_FMT " eigs\n",B_neigs)); 3610 for (j=0;j<B_neigs;j++) { 3611 if (!sub_schurs->gdsw) { 3612 if (eigs[j] == 0.0) { 3613 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n")); 3614 } else { 3615 if (upart) { 3616 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",(double)eigs[j+eigs_start])); 3617 } else { 3618 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",(double)(1./eigs[j+eigs_start]))); 3619 } 3620 } 3621 } else { 3622 double pg = (double)eigs[j+eigs_start]; 3623 if (pg < 2*PETSC_SMALL) pg = 0.0; 3624 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",pg)); 3625 } 3626 } 3627 } 3628 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3629 } 3630 /* change the basis back to the original one */ 3631 if (sub_schurs->change) { 3632 Mat change,phi,phit; 3633 3634 if (pcbddc->dbg_flag > 2) { 3635 PetscInt ii; 3636 for (ii=0;ii<B_neigs;ii++) { 3637 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n",ii,B_neigs,B_N)); 3638 for (j=0;j<B_N;j++) { 3639 #if defined(PETSC_USE_COMPLEX) 3640 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3641 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3642 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",(double)r,(double)c)); 3643 #else 3644 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",(double)(eigv[(ii+eigs_start)*subset_size+j]))); 3645 #endif 3646 } 3647 } 3648 } 3649 PetscCall(KSPGetOperators(sub_schurs->change[i],&change,NULL)); 3650 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit)); 3651 PetscCall(MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi)); 3652 PetscCall(MatCopy(phi,phit,SAME_NONZERO_PATTERN)); 3653 PetscCall(MatDestroy(&phit)); 3654 PetscCall(MatDestroy(&phi)); 3655 } 3656 maxneigs = PetscMax(B_neigs,maxneigs); 3657 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3658 if (B_neigs) { 3659 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size)); 3660 3661 if (pcbddc->dbg_flag > 1) { 3662 PetscInt ii; 3663 for (ii=0;ii<B_neigs;ii++) { 3664 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n",ii,B_neigs,B_N)); 3665 for (j=0;j<B_N;j++) { 3666 #if defined(PETSC_USE_COMPLEX) 3667 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3668 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3669 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",(double)r,(double)c)); 3670 #else 3671 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",(double)PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]))); 3672 #endif 3673 } 3674 } 3675 } 3676 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size)); 3677 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3678 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3679 cum++; 3680 } 3681 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs)); 3682 /* shift for next computation */ 3683 cumarray += subset_size*subset_size; 3684 } 3685 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3686 3687 if (mss) { 3688 if (sub_schurs->gdsw) { 3689 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray)); 3690 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3691 } else { 3692 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3693 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3694 /* destroy matrices (junk) */ 3695 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3696 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3697 } 3698 } 3699 if (allocated_S_St) PetscCall(PetscFree2(S,St)); 3700 PetscCall(PetscFree5(eigv,eigs,work,B_iwork,B_ifail)); 3701 #if defined(PETSC_USE_COMPLEX) 3702 PetscCall(PetscFree(rwork)); 3703 #endif 3704 if (pcbddc->dbg_flag) { 3705 PetscInt maxneigs_r; 3706 PetscCall(MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc))); 3707 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %" PetscInt_FMT "\n",maxneigs_r)); 3708 } 3709 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3710 PetscFunctionReturn(0); 3711 } 3712 3713 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3714 { 3715 PetscScalar *coarse_submat_vals; 3716 3717 PetscFunctionBegin; 3718 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3719 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3720 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3721 3722 /* Setup local neumann solver ksp_R */ 3723 /* PCBDDCSetUpLocalScatters should be called first! */ 3724 PetscCall(PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE)); 3725 3726 /* 3727 Setup local correction and local part of coarse basis. 3728 Gives back the dense local part of the coarse matrix in column major ordering 3729 */ 3730 PetscCall(PCBDDCSetUpCorrection(pc,&coarse_submat_vals)); 3731 3732 /* Compute total number of coarse nodes and setup coarse solver */ 3733 PetscCall(PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals)); 3734 3735 /* free */ 3736 PetscCall(PetscFree(coarse_submat_vals)); 3737 PetscFunctionReturn(0); 3738 } 3739 3740 PetscErrorCode PCBDDCResetCustomization(PC pc) 3741 { 3742 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3743 3744 PetscFunctionBegin; 3745 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3746 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3747 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3748 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3749 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3750 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3751 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3752 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3753 PetscCall(PCBDDCSetDofsSplitting(pc,0,NULL)); 3754 PetscCall(PCBDDCSetDofsSplittingLocal(pc,0,NULL)); 3755 PetscFunctionReturn(0); 3756 } 3757 3758 PetscErrorCode PCBDDCResetTopography(PC pc) 3759 { 3760 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3761 PetscInt i; 3762 3763 PetscFunctionBegin; 3764 PetscCall(MatDestroy(&pcbddc->nedcG)); 3765 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3766 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3767 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3768 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3769 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3770 PetscCall(VecDestroy(&pcbddc->work_change)); 3771 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3772 PetscCall(MatDestroy(&pcbddc->divudotp)); 3773 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3774 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3775 for (i=0;i<pcbddc->n_local_subs;i++) { 3776 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3777 } 3778 pcbddc->n_local_subs = 0; 3779 PetscCall(PetscFree(pcbddc->local_subs)); 3780 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3781 pcbddc->graphanalyzed = PETSC_FALSE; 3782 pcbddc->recompute_topography = PETSC_TRUE; 3783 pcbddc->corner_selected = PETSC_FALSE; 3784 PetscFunctionReturn(0); 3785 } 3786 3787 PetscErrorCode PCBDDCResetSolvers(PC pc) 3788 { 3789 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3790 3791 PetscFunctionBegin; 3792 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3793 if (pcbddc->coarse_phi_B) { 3794 PetscScalar *array; 3795 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&array)); 3796 PetscCall(PetscFree(array)); 3797 } 3798 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3799 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3800 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3801 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3802 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3803 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3804 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3805 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3806 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3807 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3808 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3809 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3810 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3811 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3812 PetscCall(KSPReset(pcbddc->ksp_D)); 3813 PetscCall(KSPReset(pcbddc->ksp_R)); 3814 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3815 PetscCall(MatDestroy(&pcbddc->local_mat)); 3816 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3817 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 3818 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3819 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3820 PetscCall(MatDestroy(&pcbddc->benign_change)); 3821 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3822 PetscCall(PCBDDCBenignShellMat(pc,PETSC_TRUE)); 3823 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3824 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3825 if (pcbddc->benign_zerodiag_subs) { 3826 PetscInt i; 3827 for (i=0;i<pcbddc->benign_n;i++) { 3828 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3829 } 3830 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3831 } 3832 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 3833 PetscFunctionReturn(0); 3834 } 3835 3836 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3837 { 3838 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3839 PC_IS *pcis = (PC_IS*)pc->data; 3840 VecType impVecType; 3841 PetscInt n_constraints,n_R,old_size; 3842 3843 PetscFunctionBegin; 3844 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3845 n_R = pcis->n - pcbddc->n_vertices; 3846 PetscCall(VecGetType(pcis->vec1_N,&impVecType)); 3847 /* local work vectors (try to avoid unneeded work)*/ 3848 /* R nodes */ 3849 old_size = -1; 3850 if (pcbddc->vec1_R) { 3851 PetscCall(VecGetSize(pcbddc->vec1_R,&old_size)); 3852 } 3853 if (n_R != old_size) { 3854 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3855 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3856 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R)); 3857 PetscCall(VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R)); 3858 PetscCall(VecSetType(pcbddc->vec1_R,impVecType)); 3859 PetscCall(VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R)); 3860 } 3861 /* local primal dofs */ 3862 old_size = -1; 3863 if (pcbddc->vec1_P) { 3864 PetscCall(VecGetSize(pcbddc->vec1_P,&old_size)); 3865 } 3866 if (pcbddc->local_primal_size != old_size) { 3867 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3868 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P)); 3869 PetscCall(VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size)); 3870 PetscCall(VecSetType(pcbddc->vec1_P,impVecType)); 3871 } 3872 /* local explicit constraints */ 3873 old_size = -1; 3874 if (pcbddc->vec1_C) { 3875 PetscCall(VecGetSize(pcbddc->vec1_C,&old_size)); 3876 } 3877 if (n_constraints && n_constraints != old_size) { 3878 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3879 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C)); 3880 PetscCall(VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints)); 3881 PetscCall(VecSetType(pcbddc->vec1_C,impVecType)); 3882 } 3883 PetscFunctionReturn(0); 3884 } 3885 3886 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3887 { 3888 /* pointers to pcis and pcbddc */ 3889 PC_IS* pcis = (PC_IS*)pc->data; 3890 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3891 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3892 /* submatrices of local problem */ 3893 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3894 /* submatrices of local coarse problem */ 3895 Mat S_VV,S_CV,S_VC,S_CC; 3896 /* working matrices */ 3897 Mat C_CR; 3898 /* additional working stuff */ 3899 PC pc_R; 3900 Mat F,Brhs = NULL; 3901 Vec dummy_vec; 3902 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3903 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3904 PetscScalar *work; 3905 PetscInt *idx_V_B; 3906 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3907 PetscInt i,n_R,n_D,n_B; 3908 PetscScalar one=1.0,m_one=-1.0; 3909 3910 PetscFunctionBegin; 3911 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3912 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 3913 3914 /* Set Non-overlapping dimensions */ 3915 n_vertices = pcbddc->n_vertices; 3916 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3917 n_B = pcis->n_B; 3918 n_D = pcis->n - n_B; 3919 n_R = pcis->n - n_vertices; 3920 3921 /* vertices in boundary numbering */ 3922 PetscCall(PetscMalloc1(n_vertices,&idx_V_B)); 3923 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B)); 3924 PetscCheck(i == n_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT,n_vertices,i); 3925 3926 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3927 PetscCall(PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals)); 3928 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV)); 3929 PetscCall(MatDenseSetLDA(S_VV,pcbddc->local_primal_size)); 3930 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV)); 3931 PetscCall(MatDenseSetLDA(S_CV,pcbddc->local_primal_size)); 3932 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC)); 3933 PetscCall(MatDenseSetLDA(S_VC,pcbddc->local_primal_size)); 3934 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC)); 3935 PetscCall(MatDenseSetLDA(S_CC,pcbddc->local_primal_size)); 3936 3937 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3938 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_R)); 3939 PetscCall(PCSetUp(pc_R)); 3940 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU)); 3941 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL)); 3942 lda_rhs = n_R; 3943 need_benign_correction = PETSC_FALSE; 3944 if (isLU || isCHOL) { 3945 PetscCall(PCFactorGetMatrix(pc_R,&F)); 3946 } else if (sub_schurs && sub_schurs->reuse_solver) { 3947 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3948 MatFactorType type; 3949 3950 F = reuse_solver->F; 3951 PetscCall(MatGetFactorType(F,&type)); 3952 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3953 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3954 PetscCall(MatGetSize(F,&lda_rhs,NULL)); 3955 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3956 } else F = NULL; 3957 3958 /* determine if we can use a sparse right-hand side */ 3959 sparserhs = PETSC_FALSE; 3960 if (F) { 3961 MatSolverType solver; 3962 3963 PetscCall(MatFactorGetSolverType(F,&solver)); 3964 PetscCall(PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs)); 3965 } 3966 3967 /* allocate workspace */ 3968 n = 0; 3969 if (n_constraints) { 3970 n += lda_rhs*n_constraints; 3971 } 3972 if (n_vertices) { 3973 n = PetscMax(2*lda_rhs*n_vertices,n); 3974 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3975 } 3976 if (!pcbddc->symmetric_primal) { 3977 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3978 } 3979 PetscCall(PetscMalloc1(n,&work)); 3980 3981 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3982 dummy_vec = NULL; 3983 if (need_benign_correction && lda_rhs != n_R && F) { 3984 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec)); 3985 PetscCall(VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE)); 3986 PetscCall(VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name)); 3987 } 3988 3989 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3990 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3991 3992 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3993 if (n_constraints) { 3994 Mat M3,C_B; 3995 IS is_aux; 3996 3997 /* Extract constraints on R nodes: C_{CR} */ 3998 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux)); 3999 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR)); 4000 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 4001 4002 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4003 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4004 if (!sparserhs) { 4005 PetscCall(PetscArrayzero(work,lda_rhs*n_constraints)); 4006 for (i=0;i<n_constraints;i++) { 4007 const PetscScalar *row_cmat_values; 4008 const PetscInt *row_cmat_indices; 4009 PetscInt size_of_constraint,j; 4010 4011 PetscCall(MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 4012 for (j=0;j<size_of_constraint;j++) { 4013 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4014 } 4015 PetscCall(MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 4016 } 4017 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs)); 4018 } else { 4019 Mat tC_CR; 4020 4021 PetscCall(MatScale(C_CR,-1.0)); 4022 if (lda_rhs != n_R) { 4023 PetscScalar *aa; 4024 PetscInt r,*ii,*jj; 4025 PetscBool done; 4026 4027 PetscCall(MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4028 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4029 PetscCall(MatSeqAIJGetArray(C_CR,&aa)); 4030 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR)); 4031 PetscCall(MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4032 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4033 } else { 4034 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4035 tC_CR = C_CR; 4036 } 4037 PetscCall(MatCreateTranspose(tC_CR,&Brhs)); 4038 PetscCall(MatDestroy(&tC_CR)); 4039 } 4040 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R)); 4041 if (F) { 4042 if (need_benign_correction) { 4043 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4044 4045 /* rhs is already zero on interior dofs, no need to change the rhs */ 4046 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n)); 4047 } 4048 PetscCall(MatMatSolve(F,Brhs,local_auxmat2_R)); 4049 if (need_benign_correction) { 4050 PetscScalar *marr; 4051 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4052 4053 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4054 if (lda_rhs != n_R) { 4055 for (i=0;i<n_constraints;i++) { 4056 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4057 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4058 PetscCall(VecResetArray(dummy_vec)); 4059 } 4060 } else { 4061 for (i=0;i<n_constraints;i++) { 4062 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4063 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4064 PetscCall(VecResetArray(pcbddc->vec1_R)); 4065 } 4066 } 4067 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4068 } 4069 } else { 4070 PetscScalar *marr; 4071 4072 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4073 for (i=0;i<n_constraints;i++) { 4074 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4075 PetscCall(VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs)); 4076 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4077 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4078 PetscCall(VecResetArray(pcbddc->vec1_R)); 4079 PetscCall(VecResetArray(pcbddc->vec2_R)); 4080 } 4081 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4082 } 4083 if (sparserhs) PetscCall(MatScale(C_CR,-1.0)); 4084 PetscCall(MatDestroy(&Brhs)); 4085 if (!pcbddc->switch_static) { 4086 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2)); 4087 for (i=0;i<n_constraints;i++) { 4088 Vec r, b; 4089 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R,i,&r)); 4090 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2,i,&b)); 4091 PetscCall(VecScatterBegin(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4092 PetscCall(VecScatterEnd(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4093 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2,i,&b)); 4094 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R,i,&r)); 4095 } 4096 PetscCall(MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4097 } else { 4098 if (lda_rhs != n_R) { 4099 IS dummy; 4100 4101 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy)); 4102 PetscCall(MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2)); 4103 PetscCall(ISDestroy(&dummy)); 4104 } else { 4105 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4106 pcbddc->local_auxmat2 = local_auxmat2_R; 4107 } 4108 PetscCall(MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4109 } 4110 PetscCall(ISDestroy(&is_aux)); 4111 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4112 PetscCall(MatScale(M3,m_one)); 4113 if (isCHOL) { 4114 PetscCall(MatCholeskyFactor(M3,NULL,NULL)); 4115 } else { 4116 PetscCall(MatLUFactor(M3,NULL,NULL,NULL)); 4117 } 4118 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 4119 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4120 PetscCall(MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1)); 4121 PetscCall(MatDestroy(&C_B)); 4122 PetscCall(MatCopy(M3,S_CC,SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4123 PetscCall(MatDestroy(&M3)); 4124 } 4125 4126 /* Get submatrices from subdomain matrix */ 4127 if (n_vertices) { 4128 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4129 PetscBool oldpin; 4130 #endif 4131 PetscBool isaij; 4132 IS is_aux; 4133 4134 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4135 IS tis; 4136 4137 PetscCall(ISDuplicate(pcbddc->is_R_local,&tis)); 4138 PetscCall(ISSort(tis)); 4139 PetscCall(ISComplement(tis,0,pcis->n,&is_aux)); 4140 PetscCall(ISDestroy(&tis)); 4141 } else { 4142 PetscCall(ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux)); 4143 } 4144 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4145 oldpin = pcbddc->local_mat->boundtocpu; 4146 #endif 4147 PetscCall(MatBindToCPU(pcbddc->local_mat,PETSC_TRUE)); 4148 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV)); 4149 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR)); 4150 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij)); 4151 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4152 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4153 } 4154 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV)); 4155 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4156 PetscCall(MatBindToCPU(pcbddc->local_mat,oldpin)); 4157 #endif 4158 PetscCall(ISDestroy(&is_aux)); 4159 } 4160 4161 /* Matrix of coarse basis functions (local) */ 4162 if (pcbddc->coarse_phi_B) { 4163 PetscInt on_B,on_primal,on_D=n_D; 4164 if (pcbddc->coarse_phi_D) { 4165 PetscCall(MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL)); 4166 } 4167 PetscCall(MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal)); 4168 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4169 PetscScalar *marray; 4170 4171 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&marray)); 4172 PetscCall(PetscFree(marray)); 4173 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4174 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4175 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4176 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4177 } 4178 } 4179 4180 if (!pcbddc->coarse_phi_B) { 4181 PetscScalar *marr; 4182 4183 /* memory size */ 4184 n = n_B*pcbddc->local_primal_size; 4185 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4186 if (!pcbddc->symmetric_primal) n *= 2; 4187 PetscCall(PetscCalloc1(n,&marr)); 4188 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B)); 4189 marr += n_B*pcbddc->local_primal_size; 4190 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4191 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D)); 4192 marr += n_D*pcbddc->local_primal_size; 4193 } 4194 if (!pcbddc->symmetric_primal) { 4195 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B)); 4196 marr += n_B*pcbddc->local_primal_size; 4197 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4198 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D)); 4199 } 4200 } else { 4201 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4202 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4203 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4204 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4205 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4206 } 4207 } 4208 } 4209 4210 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4211 p0_lidx_I = NULL; 4212 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4213 const PetscInt *idxs; 4214 4215 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 4216 PetscCall(PetscMalloc1(pcbddc->benign_n,&p0_lidx_I)); 4217 for (i=0;i<pcbddc->benign_n;i++) { 4218 PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i])); 4219 } 4220 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 4221 } 4222 4223 /* vertices */ 4224 if (n_vertices) { 4225 PetscBool restoreavr = PETSC_FALSE; 4226 4227 PetscCall(MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV)); 4228 4229 if (n_R) { 4230 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4231 PetscBLASInt B_N,B_one = 1; 4232 const PetscScalar *x; 4233 PetscScalar *y; 4234 4235 PetscCall(MatScale(A_RV,m_one)); 4236 if (need_benign_correction) { 4237 ISLocalToGlobalMapping RtoN; 4238 IS is_p0; 4239 PetscInt *idxs_p0,n; 4240 4241 PetscCall(PetscMalloc1(pcbddc->benign_n,&idxs_p0)); 4242 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN)); 4243 PetscCall(ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0)); 4244 PetscCheck(n == pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT,n,pcbddc->benign_n); 4245 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4246 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0)); 4247 PetscCall(MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr)); 4248 PetscCall(ISDestroy(&is_p0)); 4249 } 4250 4251 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV)); 4252 if (!sparserhs || need_benign_correction) { 4253 if (lda_rhs == n_R) { 4254 PetscCall(MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV)); 4255 } else { 4256 PetscScalar *av,*array; 4257 const PetscInt *xadj,*adjncy; 4258 PetscInt n; 4259 PetscBool flg_row; 4260 4261 array = work+lda_rhs*n_vertices; 4262 PetscCall(PetscArrayzero(array,lda_rhs*n_vertices)); 4263 PetscCall(MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV)); 4264 PetscCall(MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4265 PetscCall(MatSeqAIJGetArray(A_RV,&av)); 4266 for (i=0;i<n;i++) { 4267 PetscInt j; 4268 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4269 } 4270 PetscCall(MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4271 PetscCall(MatDestroy(&A_RV)); 4272 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV)); 4273 } 4274 if (need_benign_correction) { 4275 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4276 PetscScalar *marr; 4277 4278 PetscCall(MatDenseGetArray(A_RV,&marr)); 4279 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4280 4281 | 0 0 0 | (V) 4282 L = | 0 0 -1 | (P-p0) 4283 | 0 0 -1 | (p0) 4284 4285 */ 4286 for (i=0;i<reuse_solver->benign_n;i++) { 4287 const PetscScalar *vals; 4288 const PetscInt *idxs,*idxs_zero; 4289 PetscInt n,j,nz; 4290 4291 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4292 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4293 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4294 for (j=0;j<n;j++) { 4295 PetscScalar val = vals[j]; 4296 PetscInt k,col = idxs[j]; 4297 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4298 } 4299 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4300 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4301 } 4302 PetscCall(MatDenseRestoreArray(A_RV,&marr)); 4303 } 4304 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4305 Brhs = A_RV; 4306 } else { 4307 Mat tA_RVT,A_RVT; 4308 4309 if (!pcbddc->symmetric_primal) { 4310 /* A_RV already scaled by -1 */ 4311 PetscCall(MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT)); 4312 } else { 4313 restoreavr = PETSC_TRUE; 4314 PetscCall(MatScale(A_VR,-1.0)); 4315 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4316 A_RVT = A_VR; 4317 } 4318 if (lda_rhs != n_R) { 4319 PetscScalar *aa; 4320 PetscInt r,*ii,*jj; 4321 PetscBool done; 4322 4323 PetscCall(MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4324 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4325 PetscCall(MatSeqAIJGetArray(A_RVT,&aa)); 4326 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT)); 4327 PetscCall(MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4328 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4329 } else { 4330 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4331 tA_RVT = A_RVT; 4332 } 4333 PetscCall(MatCreateTranspose(tA_RVT,&Brhs)); 4334 PetscCall(MatDestroy(&tA_RVT)); 4335 PetscCall(MatDestroy(&A_RVT)); 4336 } 4337 if (F) { 4338 /* need to correct the rhs */ 4339 if (need_benign_correction) { 4340 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4341 PetscScalar *marr; 4342 4343 PetscCall(MatDenseGetArray(Brhs,&marr)); 4344 if (lda_rhs != n_R) { 4345 for (i=0;i<n_vertices;i++) { 4346 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4347 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE)); 4348 PetscCall(VecResetArray(dummy_vec)); 4349 } 4350 } else { 4351 for (i=0;i<n_vertices;i++) { 4352 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4353 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE)); 4354 PetscCall(VecResetArray(pcbddc->vec1_R)); 4355 } 4356 } 4357 PetscCall(MatDenseRestoreArray(Brhs,&marr)); 4358 } 4359 PetscCall(MatMatSolve(F,Brhs,A_RRmA_RV)); 4360 if (restoreavr) PetscCall(MatScale(A_VR,-1.0)); 4361 /* need to correct the solution */ 4362 if (need_benign_correction) { 4363 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4364 PetscScalar *marr; 4365 4366 PetscCall(MatDenseGetArray(A_RRmA_RV,&marr)); 4367 if (lda_rhs != n_R) { 4368 for (i=0;i<n_vertices;i++) { 4369 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4370 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4371 PetscCall(VecResetArray(dummy_vec)); 4372 } 4373 } else { 4374 for (i=0;i<n_vertices;i++) { 4375 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4376 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4377 PetscCall(VecResetArray(pcbddc->vec1_R)); 4378 } 4379 } 4380 PetscCall(MatDenseRestoreArray(A_RRmA_RV,&marr)); 4381 } 4382 } else { 4383 PetscCall(MatDenseGetArray(Brhs,&y)); 4384 for (i=0;i<n_vertices;i++) { 4385 PetscCall(VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs)); 4386 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs)); 4387 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4388 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4389 PetscCall(VecResetArray(pcbddc->vec1_R)); 4390 PetscCall(VecResetArray(pcbddc->vec2_R)); 4391 } 4392 PetscCall(MatDenseRestoreArray(Brhs,&y)); 4393 } 4394 PetscCall(MatDestroy(&A_RV)); 4395 PetscCall(MatDestroy(&Brhs)); 4396 /* S_VV and S_CV */ 4397 if (n_constraints) { 4398 Mat B; 4399 4400 PetscCall(PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices)); 4401 for (i=0;i<n_vertices;i++) { 4402 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4403 PetscCall(VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B)); 4404 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4405 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4406 PetscCall(VecResetArray(pcis->vec1_B)); 4407 PetscCall(VecResetArray(pcbddc->vec1_R)); 4408 } 4409 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B)); 4410 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4411 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV)); 4412 PetscCall(MatProductSetType(S_CV,MATPRODUCT_AB)); 4413 PetscCall(MatProductSetFromOptions(S_CV)); 4414 PetscCall(MatProductSymbolic(S_CV)); 4415 PetscCall(MatProductNumeric(S_CV)); 4416 PetscCall(MatProductClear(S_CV)); 4417 4418 PetscCall(MatDestroy(&B)); 4419 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B)); 4420 /* Reuse B = local_auxmat2_R * S_CV */ 4421 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B)); 4422 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4423 PetscCall(MatProductSetFromOptions(B)); 4424 PetscCall(MatProductSymbolic(B)); 4425 PetscCall(MatProductNumeric(B)); 4426 4427 PetscCall(MatScale(S_CV,m_one)); 4428 PetscCall(PetscBLASIntCast(lda_rhs*n_vertices,&B_N)); 4429 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4430 PetscCall(MatDestroy(&B)); 4431 } 4432 if (lda_rhs != n_R) { 4433 PetscCall(MatDestroy(&A_RRmA_RV)); 4434 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV)); 4435 PetscCall(MatDenseSetLDA(A_RRmA_RV,lda_rhs)); 4436 } 4437 PetscCall(MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt)); 4438 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4439 if (need_benign_correction) { 4440 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4441 PetscScalar *marr,*sums; 4442 4443 PetscCall(PetscMalloc1(n_vertices,&sums)); 4444 PetscCall(MatDenseGetArray(S_VVt,&marr)); 4445 for (i=0;i<reuse_solver->benign_n;i++) { 4446 const PetscScalar *vals; 4447 const PetscInt *idxs,*idxs_zero; 4448 PetscInt n,j,nz; 4449 4450 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4451 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4452 for (j=0;j<n_vertices;j++) { 4453 PetscInt k; 4454 sums[j] = 0.; 4455 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4456 } 4457 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4458 for (j=0;j<n;j++) { 4459 PetscScalar val = vals[j]; 4460 PetscInt k; 4461 for (k=0;k<n_vertices;k++) { 4462 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4463 } 4464 } 4465 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4466 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4467 } 4468 PetscCall(PetscFree(sums)); 4469 PetscCall(MatDenseRestoreArray(S_VVt,&marr)); 4470 PetscCall(MatDestroy(&A_RV_bcorr)); 4471 } 4472 PetscCall(MatDestroy(&A_RRmA_RV)); 4473 PetscCall(PetscBLASIntCast(n_vertices*n_vertices,&B_N)); 4474 PetscCall(MatDenseGetArrayRead(A_VV,&x)); 4475 PetscCall(MatDenseGetArray(S_VVt,&y)); 4476 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4477 PetscCall(MatDenseRestoreArrayRead(A_VV,&x)); 4478 PetscCall(MatDenseRestoreArray(S_VVt,&y)); 4479 PetscCall(MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN)); 4480 PetscCall(MatDestroy(&S_VVt)); 4481 } else { 4482 PetscCall(MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN)); 4483 } 4484 PetscCall(MatDestroy(&A_VV)); 4485 4486 /* coarse basis functions */ 4487 for (i=0;i<n_vertices;i++) { 4488 Vec v; 4489 PetscScalar one = 1.0,zero = 0.0; 4490 4491 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4492 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i,&v)); 4493 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4494 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4495 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4496 PetscMPIInt rank; 4497 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),&rank)); 4498 PetscCheck(rank <= 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4499 } 4500 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4501 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4502 PetscCall(VecAssemblyEnd(v)); 4503 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i,&v)); 4504 4505 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4506 PetscInt j; 4507 4508 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i,&v)); 4509 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4510 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4511 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4512 PetscMPIInt rank; 4513 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),&rank)); 4514 PetscCheck(rank <= 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4515 } 4516 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4517 PetscCall(VecAssemblyBegin(v)); 4518 PetscCall(VecAssemblyEnd(v)); 4519 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i,&v)); 4520 } 4521 PetscCall(VecResetArray(pcbddc->vec1_R)); 4522 } 4523 /* if n_R == 0 the object is not destroyed */ 4524 PetscCall(MatDestroy(&A_RV)); 4525 } 4526 PetscCall(VecDestroy(&dummy_vec)); 4527 4528 if (n_constraints) { 4529 Mat B; 4530 4531 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B)); 4532 PetscCall(MatScale(S_CC,m_one)); 4533 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B)); 4534 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4535 PetscCall(MatProductSetFromOptions(B)); 4536 PetscCall(MatProductSymbolic(B)); 4537 PetscCall(MatProductNumeric(B)); 4538 4539 PetscCall(MatScale(S_CC,m_one)); 4540 if (n_vertices) { 4541 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4542 PetscCall(MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC)); 4543 } else { 4544 Mat S_VCt; 4545 4546 if (lda_rhs != n_R) { 4547 PetscCall(MatDestroy(&B)); 4548 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B)); 4549 PetscCall(MatDenseSetLDA(B,lda_rhs)); 4550 } 4551 PetscCall(MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt)); 4552 PetscCall(MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN)); 4553 PetscCall(MatDestroy(&S_VCt)); 4554 } 4555 } 4556 PetscCall(MatDestroy(&B)); 4557 /* coarse basis functions */ 4558 for (i=0;i<n_constraints;i++) { 4559 Vec v; 4560 4561 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4562 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4563 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4564 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4565 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4566 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4567 PetscInt j; 4568 PetscScalar zero = 0.0; 4569 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4570 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4571 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4572 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4573 PetscCall(VecAssemblyBegin(v)); 4574 PetscCall(VecAssemblyEnd(v)); 4575 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4576 } 4577 PetscCall(VecResetArray(pcbddc->vec1_R)); 4578 } 4579 } 4580 if (n_constraints) { 4581 PetscCall(MatDestroy(&local_auxmat2_R)); 4582 } 4583 PetscCall(PetscFree(p0_lidx_I)); 4584 4585 /* coarse matrix entries relative to B_0 */ 4586 if (pcbddc->benign_n) { 4587 Mat B0_B,B0_BPHI; 4588 IS is_dummy; 4589 const PetscScalar *data; 4590 PetscInt j; 4591 4592 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4593 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4594 PetscCall(ISDestroy(&is_dummy)); 4595 PetscCall(MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4596 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4597 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data)); 4598 for (j=0;j<pcbddc->benign_n;j++) { 4599 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4600 for (i=0;i<pcbddc->local_primal_size;i++) { 4601 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4602 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4603 } 4604 } 4605 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data)); 4606 PetscCall(MatDestroy(&B0_B)); 4607 PetscCall(MatDestroy(&B0_BPHI)); 4608 } 4609 4610 /* compute other basis functions for non-symmetric problems */ 4611 if (!pcbddc->symmetric_primal) { 4612 Mat B_V=NULL,B_C=NULL; 4613 PetscScalar *marray; 4614 4615 if (n_constraints) { 4616 Mat S_CCT,C_CRT; 4617 4618 PetscCall(MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT)); 4619 PetscCall(MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT)); 4620 PetscCall(MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C)); 4621 PetscCall(MatDestroy(&S_CCT)); 4622 if (n_vertices) { 4623 Mat S_VCT; 4624 4625 PetscCall(MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT)); 4626 PetscCall(MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V)); 4627 PetscCall(MatDestroy(&S_VCT)); 4628 } 4629 PetscCall(MatDestroy(&C_CRT)); 4630 } else { 4631 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V)); 4632 } 4633 if (n_vertices && n_R) { 4634 PetscScalar *av,*marray; 4635 const PetscInt *xadj,*adjncy; 4636 PetscInt n; 4637 PetscBool flg_row; 4638 4639 /* B_V = B_V - A_VR^T */ 4640 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4641 PetscCall(MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4642 PetscCall(MatSeqAIJGetArray(A_VR,&av)); 4643 PetscCall(MatDenseGetArray(B_V,&marray)); 4644 for (i=0;i<n;i++) { 4645 PetscInt j; 4646 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4647 } 4648 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4649 PetscCall(MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4650 PetscCall(MatDestroy(&A_VR)); 4651 } 4652 4653 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4654 if (n_vertices) { 4655 PetscCall(MatDenseGetArray(B_V,&marray)); 4656 for (i=0;i<n_vertices;i++) { 4657 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+i*n_R)); 4658 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4659 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4660 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4661 PetscCall(VecResetArray(pcbddc->vec1_R)); 4662 PetscCall(VecResetArray(pcbddc->vec2_R)); 4663 } 4664 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4665 } 4666 if (B_C) { 4667 PetscCall(MatDenseGetArray(B_C,&marray)); 4668 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4669 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R)); 4670 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4671 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4672 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4673 PetscCall(VecResetArray(pcbddc->vec1_R)); 4674 PetscCall(VecResetArray(pcbddc->vec2_R)); 4675 } 4676 PetscCall(MatDenseRestoreArray(B_C,&marray)); 4677 } 4678 /* coarse basis functions */ 4679 for (i=0;i<pcbddc->local_primal_size;i++) { 4680 Vec v; 4681 4682 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*n_R)); 4683 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B,i,&v)); 4684 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4685 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4686 if (i<n_vertices) { 4687 PetscScalar one = 1.0; 4688 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4689 PetscCall(VecAssemblyBegin(v)); 4690 PetscCall(VecAssemblyEnd(v)); 4691 } 4692 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B,i,&v)); 4693 4694 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4695 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D,i,&v)); 4696 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4697 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4698 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D,i,&v)); 4699 } 4700 PetscCall(VecResetArray(pcbddc->vec1_R)); 4701 } 4702 PetscCall(MatDestroy(&B_V)); 4703 PetscCall(MatDestroy(&B_C)); 4704 } 4705 4706 /* free memory */ 4707 PetscCall(PetscFree(idx_V_B)); 4708 PetscCall(MatDestroy(&S_VV)); 4709 PetscCall(MatDestroy(&S_CV)); 4710 PetscCall(MatDestroy(&S_VC)); 4711 PetscCall(MatDestroy(&S_CC)); 4712 PetscCall(PetscFree(work)); 4713 if (n_vertices) { 4714 PetscCall(MatDestroy(&A_VR)); 4715 } 4716 if (n_constraints) { 4717 PetscCall(MatDestroy(&C_CR)); 4718 } 4719 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 4720 4721 /* Checking coarse_sub_mat and coarse basis functios */ 4722 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4723 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4724 if (pcbddc->dbg_flag) { 4725 Mat coarse_sub_mat; 4726 Mat AUXMAT,TM1,TM2,TM3,TM4; 4727 Mat coarse_phi_D,coarse_phi_B; 4728 Mat coarse_psi_D,coarse_psi_B; 4729 Mat A_II,A_BB,A_IB,A_BI; 4730 Mat C_B,CPHI; 4731 IS is_dummy; 4732 Vec mones; 4733 MatType checkmattype=MATSEQAIJ; 4734 PetscReal real_value; 4735 4736 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4737 Mat A; 4738 PetscCall(PCBDDCBenignProject(pc,NULL,NULL,&A)); 4739 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II)); 4740 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB)); 4741 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI)); 4742 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB)); 4743 PetscCall(MatDestroy(&A)); 4744 } else { 4745 PetscCall(MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II)); 4746 PetscCall(MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB)); 4747 PetscCall(MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI)); 4748 PetscCall(MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB)); 4749 } 4750 PetscCall(MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D)); 4751 PetscCall(MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B)); 4752 if (!pcbddc->symmetric_primal) { 4753 PetscCall(MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D)); 4754 PetscCall(MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B)); 4755 } 4756 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat)); 4757 4758 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 4759 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal)); 4760 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4761 if (!pcbddc->symmetric_primal) { 4762 PetscCall(MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4763 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1)); 4764 PetscCall(MatDestroy(&AUXMAT)); 4765 PetscCall(MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4766 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2)); 4767 PetscCall(MatDestroy(&AUXMAT)); 4768 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4769 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4770 PetscCall(MatDestroy(&AUXMAT)); 4771 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4772 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4773 PetscCall(MatDestroy(&AUXMAT)); 4774 } else { 4775 PetscCall(MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1)); 4776 PetscCall(MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2)); 4777 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4778 PetscCall(MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4779 PetscCall(MatDestroy(&AUXMAT)); 4780 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4781 PetscCall(MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4782 PetscCall(MatDestroy(&AUXMAT)); 4783 } 4784 PetscCall(MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN)); 4785 PetscCall(MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN)); 4786 PetscCall(MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN)); 4787 PetscCall(MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1)); 4788 if (pcbddc->benign_n) { 4789 Mat B0_B,B0_BPHI; 4790 const PetscScalar *data2; 4791 PetscScalar *data; 4792 PetscInt j; 4793 4794 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4795 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4796 PetscCall(MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4797 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4798 PetscCall(MatDenseGetArray(TM1,&data)); 4799 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data2)); 4800 for (j=0;j<pcbddc->benign_n;j++) { 4801 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4802 for (i=0;i<pcbddc->local_primal_size;i++) { 4803 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4804 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4805 } 4806 } 4807 PetscCall(MatDenseRestoreArray(TM1,&data)); 4808 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data2)); 4809 PetscCall(MatDestroy(&B0_B)); 4810 PetscCall(ISDestroy(&is_dummy)); 4811 PetscCall(MatDestroy(&B0_BPHI)); 4812 } 4813 #if 0 4814 { 4815 PetscViewer viewer; 4816 char filename[256]; 4817 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4818 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4819 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4820 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4821 PetscCall(MatView(coarse_sub_mat,viewer)); 4822 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4823 PetscCall(MatView(TM1,viewer)); 4824 if (pcbddc->coarse_phi_B) { 4825 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4826 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4827 } 4828 if (pcbddc->coarse_phi_D) { 4829 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4830 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4831 } 4832 if (pcbddc->coarse_psi_B) { 4833 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4834 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4835 } 4836 if (pcbddc->coarse_psi_D) { 4837 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4838 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4839 } 4840 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4841 PetscCall(MatView(pcbddc->local_mat,viewer)); 4842 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4843 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4844 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4845 PetscCall(ISView(pcis->is_I_local,viewer)); 4846 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4847 PetscCall(ISView(pcis->is_B_local,viewer)); 4848 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4849 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4850 PetscCall(PetscViewerDestroy(&viewer)); 4851 } 4852 #endif 4853 PetscCall(MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN)); 4854 PetscCall(MatNorm(TM1,NORM_FROBENIUS,&real_value)); 4855 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4856 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4857 4858 /* check constraints */ 4859 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy)); 4860 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 4861 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4862 PetscCall(MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4863 } else { 4864 PetscScalar *data; 4865 Mat tmat; 4866 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&data)); 4867 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat)); 4868 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B,&data)); 4869 PetscCall(MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4870 PetscCall(MatDestroy(&tmat)); 4871 } 4872 PetscCall(MatCreateVecs(CPHI,&mones,NULL)); 4873 PetscCall(VecSet(mones,-1.0)); 4874 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4875 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4876 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4877 if (!pcbddc->symmetric_primal) { 4878 PetscCall(MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI)); 4879 PetscCall(VecSet(mones,-1.0)); 4880 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4881 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4882 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4883 } 4884 PetscCall(MatDestroy(&C_B)); 4885 PetscCall(MatDestroy(&CPHI)); 4886 PetscCall(ISDestroy(&is_dummy)); 4887 PetscCall(VecDestroy(&mones)); 4888 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4889 PetscCall(MatDestroy(&A_II)); 4890 PetscCall(MatDestroy(&A_BB)); 4891 PetscCall(MatDestroy(&A_IB)); 4892 PetscCall(MatDestroy(&A_BI)); 4893 PetscCall(MatDestroy(&TM1)); 4894 PetscCall(MatDestroy(&TM2)); 4895 PetscCall(MatDestroy(&TM3)); 4896 PetscCall(MatDestroy(&TM4)); 4897 PetscCall(MatDestroy(&coarse_phi_D)); 4898 PetscCall(MatDestroy(&coarse_phi_B)); 4899 if (!pcbddc->symmetric_primal) { 4900 PetscCall(MatDestroy(&coarse_psi_D)); 4901 PetscCall(MatDestroy(&coarse_psi_B)); 4902 } 4903 PetscCall(MatDestroy(&coarse_sub_mat)); 4904 } 4905 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4906 { 4907 PetscBool gpu; 4908 4909 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu)); 4910 if (gpu) { 4911 if (pcbddc->local_auxmat1) { 4912 PetscCall(MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1)); 4913 } 4914 if (pcbddc->local_auxmat2) { 4915 PetscCall(MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2)); 4916 } 4917 if (pcbddc->coarse_phi_B) { 4918 PetscCall(MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B)); 4919 } 4920 if (pcbddc->coarse_phi_D) { 4921 PetscCall(MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D)); 4922 } 4923 if (pcbddc->coarse_psi_B) { 4924 PetscCall(MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B)); 4925 } 4926 if (pcbddc->coarse_psi_D) { 4927 PetscCall(MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D)); 4928 } 4929 } 4930 } 4931 /* get back data */ 4932 *coarse_submat_vals_n = coarse_submat_vals; 4933 PetscFunctionReturn(0); 4934 } 4935 4936 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4937 { 4938 Mat *work_mat; 4939 IS isrow_s,iscol_s; 4940 PetscBool rsorted,csorted; 4941 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4942 4943 PetscFunctionBegin; 4944 PetscCall(ISSorted(isrow,&rsorted)); 4945 PetscCall(ISSorted(iscol,&csorted)); 4946 PetscCall(ISGetLocalSize(isrow,&rsize)); 4947 PetscCall(ISGetLocalSize(iscol,&csize)); 4948 4949 if (!rsorted) { 4950 const PetscInt *idxs; 4951 PetscInt *idxs_sorted,i; 4952 4953 PetscCall(PetscMalloc1(rsize,&idxs_perm_r)); 4954 PetscCall(PetscMalloc1(rsize,&idxs_sorted)); 4955 for (i=0;i<rsize;i++) { 4956 idxs_perm_r[i] = i; 4957 } 4958 PetscCall(ISGetIndices(isrow,&idxs)); 4959 PetscCall(PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r)); 4960 for (i=0;i<rsize;i++) { 4961 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4962 } 4963 PetscCall(ISRestoreIndices(isrow,&idxs)); 4964 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s)); 4965 } else { 4966 PetscCall(PetscObjectReference((PetscObject)isrow)); 4967 isrow_s = isrow; 4968 } 4969 4970 if (!csorted) { 4971 if (isrow == iscol) { 4972 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4973 iscol_s = isrow_s; 4974 } else { 4975 const PetscInt *idxs; 4976 PetscInt *idxs_sorted,i; 4977 4978 PetscCall(PetscMalloc1(csize,&idxs_perm_c)); 4979 PetscCall(PetscMalloc1(csize,&idxs_sorted)); 4980 for (i=0;i<csize;i++) { 4981 idxs_perm_c[i] = i; 4982 } 4983 PetscCall(ISGetIndices(iscol,&idxs)); 4984 PetscCall(PetscSortIntWithPermutation(csize,idxs,idxs_perm_c)); 4985 for (i=0;i<csize;i++) { 4986 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4987 } 4988 PetscCall(ISRestoreIndices(iscol,&idxs)); 4989 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s)); 4990 } 4991 } else { 4992 PetscCall(PetscObjectReference((PetscObject)iscol)); 4993 iscol_s = iscol; 4994 } 4995 4996 PetscCall(MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat)); 4997 4998 if (!rsorted || !csorted) { 4999 Mat new_mat; 5000 IS is_perm_r,is_perm_c; 5001 5002 if (!rsorted) { 5003 PetscInt *idxs_r,i; 5004 PetscCall(PetscMalloc1(rsize,&idxs_r)); 5005 for (i=0;i<rsize;i++) { 5006 idxs_r[idxs_perm_r[i]] = i; 5007 } 5008 PetscCall(PetscFree(idxs_perm_r)); 5009 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r)); 5010 } else { 5011 PetscCall(ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r)); 5012 } 5013 PetscCall(ISSetPermutation(is_perm_r)); 5014 5015 if (!csorted) { 5016 if (isrow_s == iscol_s) { 5017 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5018 is_perm_c = is_perm_r; 5019 } else { 5020 PetscInt *idxs_c,i; 5021 PetscCheck(idxs_perm_c,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5022 PetscCall(PetscMalloc1(csize,&idxs_c)); 5023 for (i=0;i<csize;i++) { 5024 idxs_c[idxs_perm_c[i]] = i; 5025 } 5026 PetscCall(PetscFree(idxs_perm_c)); 5027 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c)); 5028 } 5029 } else { 5030 PetscCall(ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c)); 5031 } 5032 PetscCall(ISSetPermutation(is_perm_c)); 5033 5034 PetscCall(MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat)); 5035 PetscCall(MatDestroy(&work_mat[0])); 5036 work_mat[0] = new_mat; 5037 PetscCall(ISDestroy(&is_perm_r)); 5038 PetscCall(ISDestroy(&is_perm_c)); 5039 } 5040 5041 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5042 *B = work_mat[0]; 5043 PetscCall(MatDestroyMatrices(1,&work_mat)); 5044 PetscCall(ISDestroy(&isrow_s)); 5045 PetscCall(ISDestroy(&iscol_s)); 5046 PetscFunctionReturn(0); 5047 } 5048 5049 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5050 { 5051 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5052 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5053 Mat new_mat,lA; 5054 IS is_local,is_global; 5055 PetscInt local_size; 5056 PetscBool isseqaij; 5057 5058 PetscFunctionBegin; 5059 PetscCall(MatDestroy(&pcbddc->local_mat)); 5060 PetscCall(MatGetSize(matis->A,&local_size,NULL)); 5061 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local)); 5062 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping,is_local,&is_global)); 5063 PetscCall(ISDestroy(&is_local)); 5064 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat)); 5065 PetscCall(ISDestroy(&is_global)); 5066 5067 if (pcbddc->dbg_flag) { 5068 Vec x,x_change; 5069 PetscReal error; 5070 5071 PetscCall(MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change)); 5072 PetscCall(VecSetRandom(x,NULL)); 5073 PetscCall(MatMult(ChangeOfBasisMatrix,x,x_change)); 5074 PetscCall(VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5075 PetscCall(VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5076 PetscCall(MatMult(new_mat,matis->x,matis->y)); 5077 if (!pcbddc->change_interior) { 5078 const PetscScalar *x,*y,*v; 5079 PetscReal lerror = 0.; 5080 PetscInt i; 5081 5082 PetscCall(VecGetArrayRead(matis->x,&x)); 5083 PetscCall(VecGetArrayRead(matis->y,&y)); 5084 PetscCall(VecGetArrayRead(matis->counter,&v)); 5085 for (i=0;i<local_size;i++) 5086 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5087 lerror = PetscAbsScalar(x[i]-y[i]); 5088 PetscCall(VecRestoreArrayRead(matis->x,&x)); 5089 PetscCall(VecRestoreArrayRead(matis->y,&y)); 5090 PetscCall(VecRestoreArrayRead(matis->counter,&v)); 5091 PetscCall(MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)pc))); 5092 if (error > PETSC_SMALL) { 5093 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5094 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",(double)error); 5095 } else { 5096 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",(double)error); 5097 } 5098 } 5099 } 5100 PetscCall(VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5101 PetscCall(VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5102 PetscCall(VecAXPY(x,-1.0,x_change)); 5103 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 5104 if (error > PETSC_SMALL) { 5105 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5106 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",(double)error); 5107 } else { 5108 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",(double)error); 5109 } 5110 } 5111 PetscCall(VecDestroy(&x)); 5112 PetscCall(VecDestroy(&x_change)); 5113 } 5114 5115 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5116 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA)); 5117 5118 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5119 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij)); 5120 if (isseqaij) { 5121 PetscCall(MatDestroy(&pcbddc->local_mat)); 5122 PetscCall(MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5123 if (lA) { 5124 Mat work; 5125 PetscCall(MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5126 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5127 PetscCall(MatDestroy(&work)); 5128 } 5129 } else { 5130 Mat work_mat; 5131 5132 PetscCall(MatDestroy(&pcbddc->local_mat)); 5133 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5134 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5135 PetscCall(MatDestroy(&work_mat)); 5136 if (lA) { 5137 Mat work; 5138 PetscCall(MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5139 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5140 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5141 PetscCall(MatDestroy(&work)); 5142 } 5143 } 5144 if (matis->A->symmetric_set) { 5145 PetscCall(MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric)); 5146 #if !defined(PETSC_USE_COMPLEX) 5147 PetscCall(MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric)); 5148 #endif 5149 } 5150 PetscCall(MatDestroy(&new_mat)); 5151 PetscFunctionReturn(0); 5152 } 5153 5154 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5155 { 5156 PC_IS* pcis = (PC_IS*)(pc->data); 5157 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5158 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5159 PetscInt *idx_R_local=NULL; 5160 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5161 PetscInt vbs,bs; 5162 PetscBT bitmask=NULL; 5163 5164 PetscFunctionBegin; 5165 /* 5166 No need to setup local scatters if 5167 - primal space is unchanged 5168 AND 5169 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5170 AND 5171 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5172 */ 5173 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5174 PetscFunctionReturn(0); 5175 } 5176 /* destroy old objects */ 5177 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5178 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5179 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5180 /* Set Non-overlapping dimensions */ 5181 n_B = pcis->n_B; 5182 n_D = pcis->n - n_B; 5183 n_vertices = pcbddc->n_vertices; 5184 5185 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5186 5187 /* create auxiliary bitmask and allocate workspace */ 5188 if (!sub_schurs || !sub_schurs->reuse_solver) { 5189 PetscCall(PetscMalloc1(pcis->n-n_vertices,&idx_R_local)); 5190 PetscCall(PetscBTCreate(pcis->n,&bitmask)); 5191 for (i=0;i<n_vertices;i++) { 5192 PetscCall(PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i])); 5193 } 5194 5195 for (i=0, n_R=0; i<pcis->n; i++) { 5196 if (!PetscBTLookup(bitmask,i)) { 5197 idx_R_local[n_R++] = i; 5198 } 5199 } 5200 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5201 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5202 5203 PetscCall(ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5204 PetscCall(ISGetLocalSize(reuse_solver->is_R,&n_R)); 5205 } 5206 5207 /* Block code */ 5208 vbs = 1; 5209 PetscCall(MatGetBlockSize(pcbddc->local_mat,&bs)); 5210 if (bs>1 && !(n_vertices%bs)) { 5211 PetscBool is_blocked = PETSC_TRUE; 5212 PetscInt *vary; 5213 if (!sub_schurs || !sub_schurs->reuse_solver) { 5214 PetscCall(PetscMalloc1(pcis->n/bs,&vary)); 5215 PetscCall(PetscArrayzero(vary,pcis->n/bs)); 5216 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5217 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5218 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5219 for (i=0; i<pcis->n/bs; i++) { 5220 if (vary[i]!=0 && vary[i]!=bs) { 5221 is_blocked = PETSC_FALSE; 5222 break; 5223 } 5224 } 5225 PetscCall(PetscFree(vary)); 5226 } else { 5227 /* Verify directly the R set */ 5228 for (i=0; i<n_R/bs; i++) { 5229 PetscInt j,node=idx_R_local[bs*i]; 5230 for (j=1; j<bs; j++) { 5231 if (node != idx_R_local[bs*i+j]-j) { 5232 is_blocked = PETSC_FALSE; 5233 break; 5234 } 5235 } 5236 } 5237 } 5238 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5239 vbs = bs; 5240 for (i=0;i<n_R/vbs;i++) { 5241 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5242 } 5243 } 5244 } 5245 PetscCall(ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local)); 5246 if (sub_schurs && sub_schurs->reuse_solver) { 5247 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5248 5249 PetscCall(ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5250 PetscCall(ISDestroy(&reuse_solver->is_R)); 5251 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5252 reuse_solver->is_R = pcbddc->is_R_local; 5253 } else { 5254 PetscCall(PetscFree(idx_R_local)); 5255 } 5256 5257 /* print some info if requested */ 5258 if (pcbddc->dbg_flag) { 5259 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5260 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5261 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5262 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank)); 5263 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n",pcis->n,n_D,n_B)); 5264 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size)); 5265 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5266 } 5267 5268 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5269 if (!sub_schurs || !sub_schurs->reuse_solver) { 5270 IS is_aux1,is_aux2; 5271 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5272 5273 PetscCall(ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5274 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array1)); 5275 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array2)); 5276 PetscCall(ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5277 for (i=0; i<n_D; i++) { 5278 PetscCall(PetscBTSet(bitmask,is_indices[i])); 5279 } 5280 PetscCall(ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5281 for (i=0, j=0; i<n_R; i++) { 5282 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5283 aux_array1[j++] = i; 5284 } 5285 } 5286 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5287 PetscCall(ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5288 for (i=0, j=0; i<n_B; i++) { 5289 if (!PetscBTLookup(bitmask,is_indices[i])) { 5290 aux_array2[j++] = i; 5291 } 5292 } 5293 PetscCall(ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5294 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2)); 5295 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B)); 5296 PetscCall(ISDestroy(&is_aux1)); 5297 PetscCall(ISDestroy(&is_aux2)); 5298 5299 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5300 PetscCall(PetscMalloc1(n_D,&aux_array1)); 5301 for (i=0, j=0; i<n_R; i++) { 5302 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5303 aux_array1[j++] = i; 5304 } 5305 } 5306 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5307 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5308 PetscCall(ISDestroy(&is_aux1)); 5309 } 5310 PetscCall(PetscBTDestroy(&bitmask)); 5311 PetscCall(ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5312 } else { 5313 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5314 IS tis; 5315 PetscInt schur_size; 5316 5317 PetscCall(ISGetLocalSize(reuse_solver->is_B,&schur_size)); 5318 PetscCall(ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis)); 5319 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B)); 5320 PetscCall(ISDestroy(&tis)); 5321 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5322 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis)); 5323 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5324 PetscCall(ISDestroy(&tis)); 5325 } 5326 } 5327 PetscFunctionReturn(0); 5328 } 5329 5330 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5331 { 5332 MatNullSpace NullSpace; 5333 Mat dmat; 5334 const Vec *nullvecs; 5335 Vec v,v2,*nullvecs2; 5336 VecScatter sct = NULL; 5337 PetscContainer c; 5338 PetscScalar *ddata; 5339 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5340 PetscBool nnsp_has_cnst; 5341 5342 PetscFunctionBegin; 5343 if (!is && !B) { /* MATIS */ 5344 Mat_IS* matis = (Mat_IS*)A->data; 5345 5346 if (!B) { 5347 PetscCall(MatISGetLocalMat(A,&B)); 5348 } 5349 sct = matis->cctx; 5350 PetscCall(PetscObjectReference((PetscObject)sct)); 5351 } else { 5352 PetscCall(MatGetNullSpace(B,&NullSpace)); 5353 if (!NullSpace) { 5354 PetscCall(MatGetNearNullSpace(B,&NullSpace)); 5355 } 5356 if (NullSpace) PetscFunctionReturn(0); 5357 } 5358 PetscCall(MatGetNullSpace(A,&NullSpace)); 5359 if (!NullSpace) { 5360 PetscCall(MatGetNearNullSpace(A,&NullSpace)); 5361 } 5362 if (!NullSpace) PetscFunctionReturn(0); 5363 5364 PetscCall(MatCreateVecs(A,&v,NULL)); 5365 PetscCall(MatCreateVecs(B,&v2,NULL)); 5366 if (!sct) { 5367 PetscCall(VecScatterCreate(v,is,v2,NULL,&sct)); 5368 } 5369 PetscCall(MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs)); 5370 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5371 PetscCall(PetscMalloc1(bsiz,&nullvecs2)); 5372 PetscCall(VecGetBlockSize(v2,&bs)); 5373 PetscCall(VecGetSize(v2,&N)); 5374 PetscCall(VecGetLocalSize(v2,&n)); 5375 PetscCall(PetscMalloc1(n*bsiz,&ddata)); 5376 for (k=0;k<nnsp_size;k++) { 5377 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k])); 5378 PetscCall(VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5379 PetscCall(VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5380 } 5381 if (nnsp_has_cnst) { 5382 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size])); 5383 PetscCall(VecSet(nullvecs2[nnsp_size],1.0)); 5384 } 5385 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2)); 5386 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace)); 5387 5388 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat)); 5389 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B),&c)); 5390 PetscCall(PetscContainerSetPointer(c,ddata)); 5391 PetscCall(PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault)); 5392 PetscCall(PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c)); 5393 PetscCall(PetscContainerDestroy(&c)); 5394 PetscCall(PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat)); 5395 PetscCall(MatDestroy(&dmat)); 5396 5397 for (k=0;k<bsiz;k++) { 5398 PetscCall(VecDestroy(&nullvecs2[k])); 5399 } 5400 PetscCall(PetscFree(nullvecs2)); 5401 PetscCall(MatSetNearNullSpace(B,NullSpace)); 5402 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5403 PetscCall(VecDestroy(&v)); 5404 PetscCall(VecDestroy(&v2)); 5405 PetscCall(VecScatterDestroy(&sct)); 5406 PetscFunctionReturn(0); 5407 } 5408 5409 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5410 { 5411 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5412 PC_IS *pcis = (PC_IS*)pc->data; 5413 PC pc_temp; 5414 Mat A_RR; 5415 MatNullSpace nnsp; 5416 MatReuse reuse; 5417 PetscScalar m_one = -1.0; 5418 PetscReal value; 5419 PetscInt n_D,n_R; 5420 PetscBool issbaij,opts; 5421 void (*f)(void) = NULL; 5422 char dir_prefix[256],neu_prefix[256],str_level[16]; 5423 size_t len; 5424 5425 PetscFunctionBegin; 5426 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5427 /* approximate solver, propagate NearNullSpace if needed */ 5428 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5429 MatNullSpace gnnsp1,gnnsp2; 5430 PetscBool lhas,ghas; 5431 5432 PetscCall(MatGetNearNullSpace(pcbddc->local_mat,&nnsp)); 5433 PetscCall(MatGetNearNullSpace(pc->pmat,&gnnsp1)); 5434 PetscCall(MatGetNullSpace(pc->pmat,&gnnsp2)); 5435 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5436 PetscCall(MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 5437 if (!ghas && (gnnsp1 || gnnsp2)) { 5438 PetscCall(MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL)); 5439 } 5440 } 5441 5442 /* compute prefixes */ 5443 PetscCall(PetscStrcpy(dir_prefix,"")); 5444 PetscCall(PetscStrcpy(neu_prefix,"")); 5445 if (!pcbddc->current_level) { 5446 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix))); 5447 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix))); 5448 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5449 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5450 } else { 5451 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 5452 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 5453 len -= 15; /* remove "pc_bddc_coarse_" */ 5454 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5455 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5456 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5457 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1)); 5458 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1)); 5459 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5460 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5461 PetscCall(PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix))); 5462 PetscCall(PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix))); 5463 } 5464 5465 /* DIRICHLET PROBLEM */ 5466 if (dirichlet) { 5467 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5468 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5469 PetscCheck(sub_schurs && sub_schurs->reuse_solver,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5470 if (pcbddc->dbg_flag) { 5471 Mat A_IIn; 5472 5473 PetscCall(PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn)); 5474 PetscCall(MatDestroy(&pcis->A_II)); 5475 pcis->A_II = A_IIn; 5476 } 5477 } 5478 if (pcbddc->local_mat->symmetric_set) PetscCall(MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric)); 5479 /* Matrix for Dirichlet problem is pcis->A_II */ 5480 n_D = pcis->n - pcis->n_B; 5481 opts = PETSC_FALSE; 5482 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5483 opts = PETSC_TRUE; 5484 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D)); 5485 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1)); 5486 /* default */ 5487 PetscCall(KSPSetType(pcbddc->ksp_D,KSPPREONLY)); 5488 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix)); 5489 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij)); 5490 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5491 if (issbaij) { 5492 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5493 } else { 5494 PetscCall(PCSetType(pc_temp,PCLU)); 5495 } 5496 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure)); 5497 } 5498 PetscCall(MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix)); 5499 PetscCall(KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II)); 5500 /* Allow user's customization */ 5501 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5502 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5503 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5504 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II)); 5505 } 5506 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5507 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5508 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5509 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5510 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5511 const PetscInt *idxs; 5512 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5513 5514 PetscCall(ISGetLocalSize(pcis->is_I_local,&nl)); 5515 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 5516 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5517 for (i=0;i<nl;i++) { 5518 for (d=0;d<cdim;d++) { 5519 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5520 } 5521 } 5522 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 5523 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5524 PetscCall(PetscFree(scoords)); 5525 } 5526 if (sub_schurs && sub_schurs->reuse_solver) { 5527 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5528 5529 PetscCall(KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver)); 5530 } 5531 5532 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5533 if (!n_D) { 5534 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5535 PetscCall(PCSetType(pc_temp,PCNONE)); 5536 } 5537 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5538 /* set ksp_D into pcis data */ 5539 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5540 PetscCall(KSPDestroy(&pcis->ksp_D)); 5541 pcis->ksp_D = pcbddc->ksp_D; 5542 } 5543 5544 /* NEUMANN PROBLEM */ 5545 A_RR = NULL; 5546 if (neumann) { 5547 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5548 PetscInt ibs,mbs; 5549 PetscBool issbaij, reuse_neumann_solver; 5550 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5551 5552 reuse_neumann_solver = PETSC_FALSE; 5553 if (sub_schurs && sub_schurs->reuse_solver) { 5554 IS iP; 5555 5556 reuse_neumann_solver = PETSC_TRUE; 5557 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP)); 5558 if (iP) reuse_neumann_solver = PETSC_FALSE; 5559 } 5560 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5561 PetscCall(ISGetSize(pcbddc->is_R_local,&n_R)); 5562 if (pcbddc->ksp_R) { /* already created ksp */ 5563 PetscInt nn_R; 5564 PetscCall(KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR)); 5565 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5566 PetscCall(MatGetSize(A_RR,&nn_R,NULL)); 5567 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5568 PetscCall(KSPReset(pcbddc->ksp_R)); 5569 PetscCall(MatDestroy(&A_RR)); 5570 reuse = MAT_INITIAL_MATRIX; 5571 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5572 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5573 PetscCall(MatDestroy(&A_RR)); 5574 reuse = MAT_INITIAL_MATRIX; 5575 } else { /* safe to reuse the matrix */ 5576 reuse = MAT_REUSE_MATRIX; 5577 } 5578 } 5579 /* last check */ 5580 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5581 PetscCall(MatDestroy(&A_RR)); 5582 reuse = MAT_INITIAL_MATRIX; 5583 } 5584 } else { /* first time, so we need to create the matrix */ 5585 reuse = MAT_INITIAL_MATRIX; 5586 } 5587 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5588 TODO: Get Rid of these conversions */ 5589 PetscCall(MatGetBlockSize(pcbddc->local_mat,&mbs)); 5590 PetscCall(ISGetBlockSize(pcbddc->is_R_local,&ibs)); 5591 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij)); 5592 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5593 if (matis->A == pcbddc->local_mat) { 5594 PetscCall(MatDestroy(&pcbddc->local_mat)); 5595 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5596 } else { 5597 PetscCall(MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5598 } 5599 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5600 if (matis->A == pcbddc->local_mat) { 5601 PetscCall(MatDestroy(&pcbddc->local_mat)); 5602 PetscCall(MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5603 } else { 5604 PetscCall(MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5605 } 5606 } 5607 /* extract A_RR */ 5608 if (reuse_neumann_solver) { 5609 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5610 5611 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5612 PetscCall(MatDestroy(&A_RR)); 5613 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5614 PetscCall(PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR)); 5615 } else { 5616 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR)); 5617 } 5618 } else { 5619 PetscCall(MatDestroy(&A_RR)); 5620 PetscCall(PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL)); 5621 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5622 } 5623 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5624 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR)); 5625 } 5626 if (pcbddc->local_mat->symmetric_set) PetscCall(MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric)); 5627 opts = PETSC_FALSE; 5628 if (!pcbddc->ksp_R) { /* create object if not present */ 5629 opts = PETSC_TRUE; 5630 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R)); 5631 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1)); 5632 /* default */ 5633 PetscCall(KSPSetType(pcbddc->ksp_R,KSPPREONLY)); 5634 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix)); 5635 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5636 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij)); 5637 if (issbaij) { 5638 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5639 } else { 5640 PetscCall(PCSetType(pc_temp,PCLU)); 5641 } 5642 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure)); 5643 } 5644 PetscCall(KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR)); 5645 PetscCall(MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix)); 5646 if (opts) { /* Allow user's customization once */ 5647 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5648 } 5649 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5650 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5651 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR)); 5652 } 5653 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5654 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5655 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5656 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5657 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5658 const PetscInt *idxs; 5659 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5660 5661 PetscCall(ISGetLocalSize(pcbddc->is_R_local,&nl)); 5662 PetscCall(ISGetIndices(pcbddc->is_R_local,&idxs)); 5663 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5664 for (i=0;i<nl;i++) { 5665 for (d=0;d<cdim;d++) { 5666 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5667 } 5668 } 5669 PetscCall(ISRestoreIndices(pcbddc->is_R_local,&idxs)); 5670 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5671 PetscCall(PetscFree(scoords)); 5672 } 5673 5674 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5675 if (!n_R) { 5676 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5677 PetscCall(PCSetType(pc_temp,PCNONE)); 5678 } 5679 /* Reuse solver if it is present */ 5680 if (reuse_neumann_solver) { 5681 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5682 5683 PetscCall(KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver)); 5684 } 5685 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5686 } 5687 5688 if (pcbddc->dbg_flag) { 5689 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5690 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5691 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5692 } 5693 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5694 5695 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5696 if (pcbddc->NullSpace_corr[0]) { 5697 PetscCall(PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE)); 5698 } 5699 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5700 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1])); 5701 } 5702 if (neumann && pcbddc->NullSpace_corr[2]) { 5703 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3])); 5704 } 5705 /* check Dirichlet and Neumann solvers */ 5706 if (pcbddc->dbg_flag) { 5707 if (dirichlet) { /* Dirichlet */ 5708 PetscCall(VecSetRandom(pcis->vec1_D,NULL)); 5709 PetscCall(MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D)); 5710 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D)); 5711 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 5712 PetscCall(VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D)); 5713 PetscCall(VecNorm(pcis->vec1_D,NORM_INFINITY,&value)); 5714 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,(double)value)); 5715 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5716 } 5717 if (neumann) { /* Neumann */ 5718 PetscCall(VecSetRandom(pcbddc->vec1_R,NULL)); 5719 PetscCall(MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R)); 5720 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R)); 5721 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 5722 PetscCall(VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R)); 5723 PetscCall(VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value)); 5724 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,(double)value)); 5725 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5726 } 5727 } 5728 /* free Neumann problem's matrix */ 5729 PetscCall(MatDestroy(&A_RR)); 5730 PetscFunctionReturn(0); 5731 } 5732 5733 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5734 { 5735 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5736 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5737 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5738 5739 PetscFunctionBegin; 5740 if (!reuse_solver) { 5741 PetscCall(VecSet(pcbddc->vec1_R,0.)); 5742 } 5743 if (!pcbddc->switch_static) { 5744 if (applytranspose && pcbddc->local_auxmat1) { 5745 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C)); 5746 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5747 } 5748 if (!reuse_solver) { 5749 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5750 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5751 } else { 5752 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5753 5754 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5755 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5756 } 5757 } else { 5758 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5759 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5760 PetscCall(VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5761 PetscCall(VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5762 if (applytranspose && pcbddc->local_auxmat1) { 5763 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C)); 5764 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5765 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5766 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5767 } 5768 } 5769 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5770 if (!reuse_solver || pcbddc->switch_static) { 5771 if (applytranspose) { 5772 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5773 } else { 5774 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5775 } 5776 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R)); 5777 } else { 5778 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5779 5780 if (applytranspose) { 5781 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5782 } else { 5783 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5784 } 5785 } 5786 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5787 PetscCall(VecSet(inout_B,0.)); 5788 if (!pcbddc->switch_static) { 5789 if (!reuse_solver) { 5790 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5791 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5792 } else { 5793 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5794 5795 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5796 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5797 } 5798 if (!applytranspose && pcbddc->local_auxmat1) { 5799 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5800 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B)); 5801 } 5802 } else { 5803 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5804 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5805 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5806 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5807 if (!applytranspose && pcbddc->local_auxmat1) { 5808 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5809 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R)); 5810 } 5811 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5812 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5813 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5814 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5815 } 5816 PetscFunctionReturn(0); 5817 } 5818 5819 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5820 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5821 { 5822 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5823 PC_IS* pcis = (PC_IS*) (pc->data); 5824 const PetscScalar zero = 0.0; 5825 5826 PetscFunctionBegin; 5827 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5828 if (!pcbddc->benign_apply_coarse_only) { 5829 if (applytranspose) { 5830 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P)); 5831 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5832 } else { 5833 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P)); 5834 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5835 } 5836 } else { 5837 PetscCall(VecSet(pcbddc->vec1_P,zero)); 5838 } 5839 5840 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5841 if (pcbddc->benign_n) { 5842 PetscScalar *array; 5843 PetscInt j; 5844 5845 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5846 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5847 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5848 } 5849 5850 /* start communications from local primal nodes to rhs of coarse solver */ 5851 PetscCall(VecSet(pcbddc->coarse_vec,zero)); 5852 PetscCall(PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD)); 5853 PetscCall(PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD)); 5854 5855 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5856 if (pcbddc->coarse_ksp) { 5857 Mat coarse_mat; 5858 Vec rhs,sol; 5859 MatNullSpace nullsp; 5860 PetscBool isbddc = PETSC_FALSE; 5861 5862 if (pcbddc->benign_have_null) { 5863 PC coarse_pc; 5864 5865 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5866 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 5867 /* we need to propagate to coarser levels the need for a possible benign correction */ 5868 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5869 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5870 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5871 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5872 } 5873 } 5874 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&rhs)); 5875 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&sol)); 5876 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 5877 if (applytranspose) { 5878 PetscCheck(!pcbddc->benign_apply_coarse_only,PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5879 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5880 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol)); 5881 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5882 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5883 PetscCall(MatGetTransposeNullSpace(coarse_mat,&nullsp)); 5884 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp,sol)); 5885 } else { 5886 PetscCall(MatGetNullSpace(coarse_mat,&nullsp)); 5887 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5888 PC coarse_pc; 5889 5890 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp,rhs)); 5891 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5892 PetscCall(PCPreSolve(coarse_pc,pcbddc->coarse_ksp)); 5893 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol)); 5894 PetscCall(PCPostSolve(coarse_pc,pcbddc->coarse_ksp)); 5895 } else { 5896 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5897 PetscCall(KSPSolve(pcbddc->coarse_ksp,rhs,sol)); 5898 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5899 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5900 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp,sol)); 5901 } 5902 } 5903 /* we don't need the benign correction at coarser levels anymore */ 5904 if (pcbddc->benign_have_null && isbddc) { 5905 PC coarse_pc; 5906 PC_BDDC* coarsepcbddc; 5907 5908 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5909 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5910 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5911 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5912 } 5913 } 5914 5915 /* Local solution on R nodes */ 5916 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5917 PetscCall(PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose)); 5918 } 5919 /* communications from coarse sol to local primal nodes */ 5920 PetscCall(PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE)); 5921 PetscCall(PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE)); 5922 5923 /* Sum contributions from the two levels */ 5924 if (!pcbddc->benign_apply_coarse_only) { 5925 if (applytranspose) { 5926 PetscCall(MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5927 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5928 } else { 5929 PetscCall(MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5930 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5931 } 5932 /* store p0 */ 5933 if (pcbddc->benign_n) { 5934 PetscScalar *array; 5935 PetscInt j; 5936 5937 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5938 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5939 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5940 } 5941 } else { /* expand the coarse solution */ 5942 if (applytranspose) { 5943 PetscCall(MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B)); 5944 } else { 5945 PetscCall(MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B)); 5946 } 5947 } 5948 PetscFunctionReturn(0); 5949 } 5950 5951 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5952 { 5953 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5954 Vec from,to; 5955 const PetscScalar *array; 5956 5957 PetscFunctionBegin; 5958 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5959 from = pcbddc->coarse_vec; 5960 to = pcbddc->vec1_P; 5961 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5962 Vec tvec; 5963 5964 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5965 PetscCall(VecResetArray(tvec)); 5966 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&tvec)); 5967 PetscCall(VecGetArrayRead(tvec,&array)); 5968 PetscCall(VecPlaceArray(from,array)); 5969 PetscCall(VecRestoreArrayRead(tvec,&array)); 5970 } 5971 } else { /* from local to global -> put data in coarse right hand side */ 5972 from = pcbddc->vec1_P; 5973 to = pcbddc->coarse_vec; 5974 } 5975 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 5976 PetscFunctionReturn(0); 5977 } 5978 5979 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5980 { 5981 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5982 Vec from,to; 5983 const PetscScalar *array; 5984 5985 PetscFunctionBegin; 5986 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5987 from = pcbddc->coarse_vec; 5988 to = pcbddc->vec1_P; 5989 } else { /* from local to global -> put data in coarse right hand side */ 5990 from = pcbddc->vec1_P; 5991 to = pcbddc->coarse_vec; 5992 } 5993 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 5994 if (smode == SCATTER_FORWARD) { 5995 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5996 Vec tvec; 5997 5998 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5999 PetscCall(VecGetArrayRead(to,&array)); 6000 PetscCall(VecPlaceArray(tvec,array)); 6001 PetscCall(VecRestoreArrayRead(to,&array)); 6002 } 6003 } else { 6004 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6005 PetscCall(VecResetArray(from)); 6006 } 6007 } 6008 PetscFunctionReturn(0); 6009 } 6010 6011 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6012 { 6013 PC_IS* pcis = (PC_IS*)(pc->data); 6014 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6015 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6016 /* one and zero */ 6017 PetscScalar one=1.0,zero=0.0; 6018 /* space to store constraints and their local indices */ 6019 PetscScalar *constraints_data; 6020 PetscInt *constraints_idxs,*constraints_idxs_B; 6021 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6022 PetscInt *constraints_n; 6023 /* iterators */ 6024 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6025 /* BLAS integers */ 6026 PetscBLASInt lwork,lierr; 6027 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6028 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6029 /* reuse */ 6030 PetscInt olocal_primal_size,olocal_primal_size_cc; 6031 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6032 /* change of basis */ 6033 PetscBool qr_needed; 6034 PetscBT change_basis,qr_needed_idx; 6035 /* auxiliary stuff */ 6036 PetscInt *nnz,*is_indices; 6037 PetscInt ncc; 6038 /* some quantities */ 6039 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6040 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6041 PetscReal tol; /* tolerance for retaining eigenmodes */ 6042 6043 PetscFunctionBegin; 6044 tol = PetscSqrtReal(PETSC_SMALL); 6045 /* Destroy Mat objects computed previously */ 6046 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6047 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6048 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6049 /* save info on constraints from previous setup (if any) */ 6050 olocal_primal_size = pcbddc->local_primal_size; 6051 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6052 PetscCall(PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult)); 6053 PetscCall(PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc)); 6054 PetscCall(PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc)); 6055 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 6056 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6057 6058 if (!pcbddc->adaptive_selection) { 6059 IS ISForVertices,*ISForFaces,*ISForEdges; 6060 MatNullSpace nearnullsp; 6061 const Vec *nearnullvecs; 6062 Vec *localnearnullsp; 6063 PetscScalar *array; 6064 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size,o_nf,o_ne; 6065 PetscBool nnsp_has_cnst; 6066 /* LAPACK working arrays for SVD or POD */ 6067 PetscBool skip_lapack,boolforchange; 6068 PetscScalar *work; 6069 PetscReal *singular_vals; 6070 #if defined(PETSC_USE_COMPLEX) 6071 PetscReal *rwork; 6072 #endif 6073 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6074 PetscBLASInt dummy_int=1; 6075 PetscScalar dummy_scalar=1.; 6076 PetscBool use_pod = PETSC_FALSE; 6077 6078 /* MKL SVD with same input gives different results on different processes! */ 6079 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6080 use_pod = PETSC_TRUE; 6081 #endif 6082 /* Get index sets for faces, edges and vertices from graph */ 6083 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices)); 6084 o_nf = n_ISForFaces; 6085 o_ne = n_ISForEdges; 6086 n_vertices = 0; 6087 if (ISForVertices) PetscCall(ISGetSize(ISForVertices,&n_vertices)); 6088 /* print some info */ 6089 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6090 6091 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6092 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 6093 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6094 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6095 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n",PetscGlobalRank,n_vertices,pcbddc->use_vertices)); 6096 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges)); 6097 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces)); 6098 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6099 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6100 } 6101 6102 if (!pcbddc->use_vertices) n_vertices = 0; 6103 if (!pcbddc->use_edges) n_ISForEdges = 0; 6104 if (!pcbddc->use_faces) n_ISForFaces = 0; 6105 6106 /* check if near null space is attached to global mat */ 6107 if (pcbddc->use_nnsp) { 6108 PetscCall(MatGetNearNullSpace(pc->pmat,&nearnullsp)); 6109 } else nearnullsp = NULL; 6110 6111 if (nearnullsp) { 6112 PetscCall(MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs)); 6113 /* remove any stored info */ 6114 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6115 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6116 /* store information for BDDC solver reuse */ 6117 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6118 pcbddc->onearnullspace = nearnullsp; 6119 PetscCall(PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state)); 6120 for (i=0;i<nnsp_size;i++) { 6121 PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i])); 6122 } 6123 } else { /* if near null space is not provided BDDC uses constants by default */ 6124 nnsp_size = 0; 6125 nnsp_has_cnst = PETSC_TRUE; 6126 } 6127 /* get max number of constraints on a single cc */ 6128 max_constraints = nnsp_size; 6129 if (nnsp_has_cnst) max_constraints++; 6130 6131 /* 6132 Evaluate maximum storage size needed by the procedure 6133 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6134 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6135 There can be multiple constraints per connected component 6136 */ 6137 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6138 PetscCall(PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n)); 6139 6140 total_counts = n_ISForFaces+n_ISForEdges; 6141 total_counts *= max_constraints; 6142 total_counts += n_vertices; 6143 PetscCall(PetscBTCreate(total_counts,&change_basis)); 6144 6145 total_counts = 0; 6146 max_size_of_constraint = 0; 6147 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6148 IS used_is; 6149 if (i<n_ISForEdges) { 6150 used_is = ISForEdges[i]; 6151 } else { 6152 used_is = ISForFaces[i-n_ISForEdges]; 6153 } 6154 PetscCall(ISGetSize(used_is,&j)); 6155 total_counts += j; 6156 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6157 } 6158 PetscCall(PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B)); 6159 6160 /* get local part of global near null space vectors */ 6161 PetscCall(PetscMalloc1(nnsp_size,&localnearnullsp)); 6162 for (k=0;k<nnsp_size;k++) { 6163 PetscCall(VecDuplicate(pcis->vec1_N,&localnearnullsp[k])); 6164 PetscCall(VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6165 PetscCall(VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6166 } 6167 6168 /* whether or not to skip lapack calls */ 6169 skip_lapack = PETSC_TRUE; 6170 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6171 6172 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6173 if (!skip_lapack) { 6174 PetscScalar temp_work; 6175 6176 if (use_pod) { 6177 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6178 PetscCall(PetscMalloc1(max_constraints*max_constraints,&correlation_mat)); 6179 PetscCall(PetscMalloc1(max_constraints,&singular_vals)); 6180 PetscCall(PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis)); 6181 #if defined(PETSC_USE_COMPLEX) 6182 PetscCall(PetscMalloc1(3*max_constraints,&rwork)); 6183 #endif 6184 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6185 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6186 PetscCall(PetscBLASIntCast(max_constraints,&Blas_LDA)); 6187 lwork = -1; 6188 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6189 #if !defined(PETSC_USE_COMPLEX) 6190 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6191 #else 6192 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6193 #endif 6194 PetscCall(PetscFPTrapPop()); 6195 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6196 } else { 6197 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6198 /* SVD */ 6199 PetscInt max_n,min_n; 6200 max_n = max_size_of_constraint; 6201 min_n = max_constraints; 6202 if (max_size_of_constraint < max_constraints) { 6203 min_n = max_size_of_constraint; 6204 max_n = max_constraints; 6205 } 6206 PetscCall(PetscMalloc1(min_n,&singular_vals)); 6207 #if defined(PETSC_USE_COMPLEX) 6208 PetscCall(PetscMalloc1(5*min_n,&rwork)); 6209 #endif 6210 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6211 lwork = -1; 6212 PetscCall(PetscBLASIntCast(max_n,&Blas_M)); 6213 PetscCall(PetscBLASIntCast(min_n,&Blas_N)); 6214 PetscCall(PetscBLASIntCast(max_n,&Blas_LDA)); 6215 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6216 #if !defined(PETSC_USE_COMPLEX) 6217 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6218 #else 6219 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6220 #endif 6221 PetscCall(PetscFPTrapPop()); 6222 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6223 #else 6224 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6225 #endif /* on missing GESVD */ 6226 } 6227 /* Allocate optimal workspace */ 6228 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork)); 6229 PetscCall(PetscMalloc1(lwork,&work)); 6230 } 6231 /* Now we can loop on constraining sets */ 6232 total_counts = 0; 6233 constraints_idxs_ptr[0] = 0; 6234 constraints_data_ptr[0] = 0; 6235 /* vertices */ 6236 if (n_vertices) { 6237 PetscCall(ISGetIndices(ISForVertices,(const PetscInt**)&is_indices)); 6238 PetscCall(PetscArraycpy(constraints_idxs,is_indices,n_vertices)); 6239 for (i=0;i<n_vertices;i++) { 6240 constraints_n[total_counts] = 1; 6241 constraints_data[total_counts] = 1.0; 6242 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6243 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6244 total_counts++; 6245 } 6246 PetscCall(ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices)); 6247 } 6248 6249 /* edges and faces */ 6250 total_counts_cc = total_counts; 6251 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6252 IS used_is; 6253 PetscBool idxs_copied = PETSC_FALSE; 6254 6255 if (ncc<n_ISForEdges) { 6256 used_is = ISForEdges[ncc]; 6257 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6258 } else { 6259 used_is = ISForFaces[ncc-n_ISForEdges]; 6260 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6261 } 6262 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6263 6264 PetscCall(ISGetSize(used_is,&size_of_constraint)); 6265 if (!size_of_constraint) continue; 6266 PetscCall(ISGetIndices(used_is,(const PetscInt**)&is_indices)); 6267 /* change of basis should not be performed on local periodic nodes */ 6268 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6269 if (nnsp_has_cnst) { 6270 PetscScalar quad_value; 6271 6272 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6273 idxs_copied = PETSC_TRUE; 6274 6275 if (!pcbddc->use_nnsp_true) { 6276 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6277 } else { 6278 quad_value = 1.0; 6279 } 6280 for (j=0;j<size_of_constraint;j++) { 6281 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6282 } 6283 temp_constraints++; 6284 total_counts++; 6285 } 6286 for (k=0;k<nnsp_size;k++) { 6287 PetscReal real_value; 6288 PetscScalar *ptr_to_data; 6289 6290 PetscCall(VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6291 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6292 for (j=0;j<size_of_constraint;j++) { 6293 ptr_to_data[j] = array[is_indices[j]]; 6294 } 6295 PetscCall(VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6296 /* check if array is null on the connected component */ 6297 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6298 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6299 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6300 temp_constraints++; 6301 total_counts++; 6302 if (!idxs_copied) { 6303 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6304 idxs_copied = PETSC_TRUE; 6305 } 6306 } 6307 } 6308 PetscCall(ISRestoreIndices(used_is,(const PetscInt**)&is_indices)); 6309 valid_constraints = temp_constraints; 6310 if (!pcbddc->use_nnsp_true && temp_constraints) { 6311 if (temp_constraints == 1) { /* just normalize the constraint */ 6312 PetscScalar norm,*ptr_to_data; 6313 6314 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6315 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6316 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6317 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6318 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6319 } else { /* perform SVD */ 6320 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6321 6322 if (use_pod) { 6323 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6324 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6325 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6326 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6327 from that computed using LAPACKgesvd 6328 -> This is due to a different computation of eigenvectors in LAPACKheev 6329 -> The quality of the POD-computed basis will be the same */ 6330 PetscCall(PetscArrayzero(correlation_mat,temp_constraints*temp_constraints)); 6331 /* Store upper triangular part of correlation matrix */ 6332 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6333 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6334 for (j=0;j<temp_constraints;j++) { 6335 for (k=0;k<j+1;k++) { 6336 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6337 } 6338 } 6339 /* compute eigenvalues and eigenvectors of correlation matrix */ 6340 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6341 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDA)); 6342 #if !defined(PETSC_USE_COMPLEX) 6343 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6344 #else 6345 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6346 #endif 6347 PetscCall(PetscFPTrapPop()); 6348 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6349 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6350 j = 0; 6351 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6352 total_counts = total_counts-j; 6353 valid_constraints = temp_constraints-j; 6354 /* scale and copy POD basis into used quadrature memory */ 6355 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6356 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6357 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_K)); 6358 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6359 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDB)); 6360 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6361 if (j<temp_constraints) { 6362 PetscInt ii; 6363 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6364 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6365 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6366 PetscCall(PetscFPTrapPop()); 6367 for (k=0;k<temp_constraints-j;k++) { 6368 for (ii=0;ii<size_of_constraint;ii++) { 6369 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6370 } 6371 } 6372 } 6373 } else { 6374 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6375 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6376 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6377 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6378 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6379 #if !defined(PETSC_USE_COMPLEX) 6380 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6381 #else 6382 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6383 #endif 6384 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6385 PetscCall(PetscFPTrapPop()); 6386 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6387 k = temp_constraints; 6388 if (k > size_of_constraint) k = size_of_constraint; 6389 j = 0; 6390 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6391 valid_constraints = k-j; 6392 total_counts = total_counts-temp_constraints+valid_constraints; 6393 #else 6394 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6395 #endif /* on missing GESVD */ 6396 } 6397 } 6398 } 6399 /* update pointers information */ 6400 if (valid_constraints) { 6401 constraints_n[total_counts_cc] = valid_constraints; 6402 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6403 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6404 /* set change_of_basis flag */ 6405 if (boolforchange) { 6406 PetscBTSet(change_basis,total_counts_cc); 6407 } 6408 total_counts_cc++; 6409 } 6410 } 6411 /* free workspace */ 6412 if (!skip_lapack) { 6413 PetscCall(PetscFree(work)); 6414 #if defined(PETSC_USE_COMPLEX) 6415 PetscCall(PetscFree(rwork)); 6416 #endif 6417 PetscCall(PetscFree(singular_vals)); 6418 PetscCall(PetscFree(correlation_mat)); 6419 PetscCall(PetscFree(temp_basis)); 6420 } 6421 for (k=0;k<nnsp_size;k++) { 6422 PetscCall(VecDestroy(&localnearnullsp[k])); 6423 } 6424 PetscCall(PetscFree(localnearnullsp)); 6425 /* free index sets of faces, edges and vertices */ 6426 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,&o_nf,&ISForFaces,&o_ne,&ISForEdges,&ISForVertices)); 6427 } else { 6428 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6429 6430 total_counts = 0; 6431 n_vertices = 0; 6432 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6433 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 6434 } 6435 max_constraints = 0; 6436 total_counts_cc = 0; 6437 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6438 total_counts += pcbddc->adaptive_constraints_n[i]; 6439 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6440 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6441 } 6442 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6443 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6444 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6445 constraints_data = pcbddc->adaptive_constraints_data; 6446 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6447 PetscCall(PetscMalloc1(total_counts_cc,&constraints_n)); 6448 total_counts_cc = 0; 6449 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6450 if (pcbddc->adaptive_constraints_n[i]) { 6451 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6452 } 6453 } 6454 6455 max_size_of_constraint = 0; 6456 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6457 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B)); 6458 /* Change of basis */ 6459 PetscCall(PetscBTCreate(total_counts_cc,&change_basis)); 6460 if (pcbddc->use_change_of_basis) { 6461 for (i=0;i<sub_schurs->n_subs;i++) { 6462 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6463 PetscCall(PetscBTSet(change_basis,i+n_vertices)); 6464 } 6465 } 6466 } 6467 } 6468 pcbddc->local_primal_size = total_counts; 6469 PetscCall(PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs)); 6470 6471 /* map constraints_idxs in boundary numbering */ 6472 if (pcbddc->use_change_of_basis) { 6473 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B)); 6474 PetscCheck(i == constraints_idxs_ptr[total_counts_cc],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT,constraints_idxs_ptr[total_counts_cc],i); 6475 } 6476 6477 /* Create constraint matrix */ 6478 PetscCall(MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix)); 6479 PetscCall(MatSetType(pcbddc->ConstraintMatrix,MATAIJ)); 6480 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n)); 6481 6482 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6483 /* determine if a QR strategy is needed for change of basis */ 6484 qr_needed = pcbddc->use_qr_single; 6485 PetscCall(PetscBTCreate(total_counts_cc,&qr_needed_idx)); 6486 total_primal_vertices=0; 6487 pcbddc->local_primal_size_cc = 0; 6488 for (i=0;i<total_counts_cc;i++) { 6489 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6490 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6491 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6492 pcbddc->local_primal_size_cc += 1; 6493 } else if (PetscBTLookup(change_basis,i)) { 6494 for (k=0;k<constraints_n[i];k++) { 6495 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6496 } 6497 pcbddc->local_primal_size_cc += constraints_n[i]; 6498 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6499 PetscBTSet(qr_needed_idx,i); 6500 qr_needed = PETSC_TRUE; 6501 } 6502 } else { 6503 pcbddc->local_primal_size_cc += 1; 6504 } 6505 } 6506 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6507 pcbddc->n_vertices = total_primal_vertices; 6508 /* permute indices in order to have a sorted set of vertices */ 6509 PetscCall(PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs)); 6510 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult)); 6511 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices)); 6512 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6513 6514 /* nonzero structure of constraint matrix */ 6515 /* and get reference dof for local constraints */ 6516 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&nnz)); 6517 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6518 6519 j = total_primal_vertices; 6520 total_counts = total_primal_vertices; 6521 cum = total_primal_vertices; 6522 for (i=n_vertices;i<total_counts_cc;i++) { 6523 if (!PetscBTLookup(change_basis,i)) { 6524 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6525 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6526 cum++; 6527 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6528 for (k=0;k<constraints_n[i];k++) { 6529 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6530 nnz[j+k] = size_of_constraint; 6531 } 6532 j += constraints_n[i]; 6533 } 6534 } 6535 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz)); 6536 PetscCall(MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6537 PetscCall(PetscFree(nnz)); 6538 6539 /* set values in constraint matrix */ 6540 for (i=0;i<total_primal_vertices;i++) { 6541 PetscCall(MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES)); 6542 } 6543 total_counts = total_primal_vertices; 6544 for (i=n_vertices;i<total_counts_cc;i++) { 6545 if (!PetscBTLookup(change_basis,i)) { 6546 PetscInt *cols; 6547 6548 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6549 cols = constraints_idxs+constraints_idxs_ptr[i]; 6550 for (k=0;k<constraints_n[i];k++) { 6551 PetscInt row = total_counts+k; 6552 PetscScalar *vals; 6553 6554 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6555 PetscCall(MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES)); 6556 } 6557 total_counts += constraints_n[i]; 6558 } 6559 } 6560 /* assembling */ 6561 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6562 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6563 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view")); 6564 6565 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6566 if (pcbddc->use_change_of_basis) { 6567 /* dual and primal dofs on a single cc */ 6568 PetscInt dual_dofs,primal_dofs; 6569 /* working stuff for GEQRF */ 6570 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6571 PetscBLASInt lqr_work; 6572 /* working stuff for UNGQR */ 6573 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6574 PetscBLASInt lgqr_work; 6575 /* working stuff for TRTRS */ 6576 PetscScalar *trs_rhs = NULL; 6577 PetscBLASInt Blas_NRHS; 6578 /* pointers for values insertion into change of basis matrix */ 6579 PetscInt *start_rows,*start_cols; 6580 PetscScalar *start_vals; 6581 /* working stuff for values insertion */ 6582 PetscBT is_primal; 6583 PetscInt *aux_primal_numbering_B; 6584 /* matrix sizes */ 6585 PetscInt global_size,local_size; 6586 /* temporary change of basis */ 6587 Mat localChangeOfBasisMatrix; 6588 /* extra space for debugging */ 6589 PetscScalar *dbg_work = NULL; 6590 6591 PetscCall(MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix)); 6592 PetscCall(MatSetType(localChangeOfBasisMatrix,MATAIJ)); 6593 PetscCall(MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n)); 6594 /* nonzeros for local mat */ 6595 PetscCall(PetscMalloc1(pcis->n,&nnz)); 6596 if (!pcbddc->benign_change || pcbddc->fake_change) { 6597 for (i=0;i<pcis->n;i++) nnz[i]=1; 6598 } else { 6599 const PetscInt *ii; 6600 PetscInt n; 6601 PetscBool flg_row; 6602 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6603 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6604 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6605 } 6606 for (i=n_vertices;i<total_counts_cc;i++) { 6607 if (PetscBTLookup(change_basis,i)) { 6608 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6609 if (PetscBTLookup(qr_needed_idx,i)) { 6610 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6611 } else { 6612 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6613 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6614 } 6615 } 6616 } 6617 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz)); 6618 PetscCall(MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6619 PetscCall(PetscFree(nnz)); 6620 /* Set interior change in the matrix */ 6621 if (!pcbddc->benign_change || pcbddc->fake_change) { 6622 for (i=0;i<pcis->n;i++) { 6623 PetscCall(MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES)); 6624 } 6625 } else { 6626 const PetscInt *ii,*jj; 6627 PetscScalar *aa; 6628 PetscInt n; 6629 PetscBool flg_row; 6630 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6631 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change,&aa)); 6632 for (i=0;i<n;i++) { 6633 PetscCall(MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES)); 6634 } 6635 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change,&aa)); 6636 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6637 } 6638 6639 if (pcbddc->dbg_flag) { 6640 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6641 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank)); 6642 } 6643 6644 /* Now we loop on the constraints which need a change of basis */ 6645 /* 6646 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6647 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6648 6649 Basic blocks of change of basis matrix T computed: 6650 6651 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6652 6653 | 1 0 ... 0 s_1/S | 6654 | 0 1 ... 0 s_2/S | 6655 | ... | 6656 | 0 ... 1 s_{n-1}/S | 6657 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6658 6659 with S = \sum_{i=1}^n s_i^2 6660 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6661 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6662 6663 - QR decomposition of constraints otherwise 6664 */ 6665 if (qr_needed && max_size_of_constraint) { 6666 /* space to store Q */ 6667 PetscCall(PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis)); 6668 /* array to store scaling factors for reflectors */ 6669 PetscCall(PetscMalloc1(max_constraints,&qr_tau)); 6670 /* first we issue queries for optimal work */ 6671 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6672 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6673 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6674 lqr_work = -1; 6675 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6676 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6677 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work)); 6678 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work)); 6679 lgqr_work = -1; 6680 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6681 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_N)); 6682 PetscCall(PetscBLASIntCast(max_constraints,&Blas_K)); 6683 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6684 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6685 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6686 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6687 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work)); 6688 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work)); 6689 /* array to store rhs and solution of triangular solver */ 6690 PetscCall(PetscMalloc1(max_constraints*max_constraints,&trs_rhs)); 6691 /* allocating workspace for check */ 6692 if (pcbddc->dbg_flag) { 6693 PetscCall(PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work)); 6694 } 6695 } 6696 /* array to store whether a node is primal or not */ 6697 PetscCall(PetscBTCreate(pcis->n_B,&is_primal)); 6698 PetscCall(PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B)); 6699 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B)); 6700 PetscCheck(i == total_primal_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT,total_primal_vertices,i); 6701 for (i=0;i<total_primal_vertices;i++) { 6702 PetscCall(PetscBTSet(is_primal,aux_primal_numbering_B[i])); 6703 } 6704 PetscCall(PetscFree(aux_primal_numbering_B)); 6705 6706 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6707 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6708 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6709 if (PetscBTLookup(change_basis,total_counts)) { 6710 /* get constraint info */ 6711 primal_dofs = constraints_n[total_counts]; 6712 dual_dofs = size_of_constraint-primal_dofs; 6713 6714 if (pcbddc->dbg_flag) { 6715 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n",total_counts,primal_dofs,size_of_constraint)); 6716 } 6717 6718 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6719 6720 /* copy quadrature constraints for change of basis check */ 6721 if (pcbddc->dbg_flag) { 6722 PetscCall(PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6723 } 6724 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6725 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6726 6727 /* compute QR decomposition of constraints */ 6728 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6729 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6730 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6731 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6732 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6733 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6734 PetscCall(PetscFPTrapPop()); 6735 6736 /* explicitly compute R^-T */ 6737 PetscCall(PetscArrayzero(trs_rhs,primal_dofs*primal_dofs)); 6738 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6739 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6740 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_NRHS)); 6741 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6742 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6743 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6744 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6745 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6746 PetscCall(PetscFPTrapPop()); 6747 6748 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6749 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6750 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6751 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6752 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6753 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6754 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6755 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6756 PetscCall(PetscFPTrapPop()); 6757 6758 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6759 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6760 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6761 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6762 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6763 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6764 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6765 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6766 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6767 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6768 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6769 PetscCall(PetscFPTrapPop()); 6770 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6771 6772 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6773 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6774 /* insert cols for primal dofs */ 6775 for (j=0;j<primal_dofs;j++) { 6776 start_vals = &qr_basis[j*size_of_constraint]; 6777 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6778 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6779 } 6780 /* insert cols for dual dofs */ 6781 for (j=0,k=0;j<dual_dofs;k++) { 6782 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6783 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6784 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6785 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6786 j++; 6787 } 6788 } 6789 6790 /* check change of basis */ 6791 if (pcbddc->dbg_flag) { 6792 PetscInt ii,jj; 6793 PetscBool valid_qr=PETSC_TRUE; 6794 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_M)); 6795 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6796 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_K)); 6797 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6798 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDB)); 6799 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDC)); 6800 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6801 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6802 PetscCall(PetscFPTrapPop()); 6803 for (jj=0;jj<size_of_constraint;jj++) { 6804 for (ii=0;ii<primal_dofs;ii++) { 6805 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6806 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6807 } 6808 } 6809 if (!valid_qr) { 6810 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n")); 6811 for (jj=0;jj<size_of_constraint;jj++) { 6812 for (ii=0;ii<primal_dofs;ii++) { 6813 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6814 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n",jj,ii,(double)PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6815 } 6816 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6817 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n",jj,ii,(double)PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6818 } 6819 } 6820 } 6821 } else { 6822 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n")); 6823 } 6824 } 6825 } else { /* simple transformation block */ 6826 PetscInt row,col; 6827 PetscScalar val,norm; 6828 6829 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6830 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6831 for (j=0;j<size_of_constraint;j++) { 6832 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6833 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6834 if (!PetscBTLookup(is_primal,row_B)) { 6835 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6836 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES)); 6837 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES)); 6838 } else { 6839 for (k=0;k<size_of_constraint;k++) { 6840 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6841 if (row != col) { 6842 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6843 } else { 6844 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6845 } 6846 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES)); 6847 } 6848 } 6849 } 6850 if (pcbddc->dbg_flag) { 6851 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n")); 6852 } 6853 } 6854 } else { 6855 if (pcbddc->dbg_flag) { 6856 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n",total_counts,size_of_constraint)); 6857 } 6858 } 6859 } 6860 6861 /* free workspace */ 6862 if (qr_needed) { 6863 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 6864 PetscCall(PetscFree(trs_rhs)); 6865 PetscCall(PetscFree(qr_tau)); 6866 PetscCall(PetscFree(qr_work)); 6867 PetscCall(PetscFree(gqr_work)); 6868 PetscCall(PetscFree(qr_basis)); 6869 } 6870 PetscCall(PetscBTDestroy(&is_primal)); 6871 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6872 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6873 6874 /* assembling of global change of variable */ 6875 if (!pcbddc->fake_change) { 6876 Mat tmat; 6877 PetscInt bs; 6878 6879 PetscCall(VecGetSize(pcis->vec1_global,&global_size)); 6880 PetscCall(VecGetLocalSize(pcis->vec1_global,&local_size)); 6881 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat)); 6882 PetscCall(MatISSetLocalMat(tmat,localChangeOfBasisMatrix)); 6883 PetscCall(MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY)); 6884 PetscCall(MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY)); 6885 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix)); 6886 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ)); 6887 PetscCall(MatGetBlockSize(pc->pmat,&bs)); 6888 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs)); 6889 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size)); 6890 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE)); 6891 PetscCall(MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix)); 6892 PetscCall(MatDestroy(&tmat)); 6893 PetscCall(VecSet(pcis->vec1_global,0.0)); 6894 PetscCall(VecSet(pcis->vec1_N,1.0)); 6895 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6896 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6897 PetscCall(VecReciprocal(pcis->vec1_global)); 6898 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL)); 6899 6900 /* check */ 6901 if (pcbddc->dbg_flag) { 6902 PetscReal error; 6903 Vec x,x_change; 6904 6905 PetscCall(VecDuplicate(pcis->vec1_global,&x)); 6906 PetscCall(VecDuplicate(pcis->vec1_global,&x_change)); 6907 PetscCall(VecSetRandom(x,NULL)); 6908 PetscCall(VecCopy(x,pcis->vec1_global)); 6909 PetscCall(VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6910 PetscCall(VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6911 PetscCall(MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N)); 6912 PetscCall(VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6913 PetscCall(VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6914 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change)); 6915 PetscCall(VecAXPY(x,-1.0,x_change)); 6916 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 6917 if (error > PETSC_SMALL) { 6918 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",(double)error); 6919 } 6920 PetscCall(VecDestroy(&x)); 6921 PetscCall(VecDestroy(&x_change)); 6922 } 6923 /* adapt sub_schurs computed (if any) */ 6924 if (pcbddc->use_deluxe_scaling) { 6925 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6926 6927 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6928 if (sub_schurs && sub_schurs->S_Ej_all) { 6929 Mat S_new,tmat; 6930 IS is_all_N,is_V_Sall = NULL; 6931 6932 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N)); 6933 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat)); 6934 if (pcbddc->deluxe_zerorows) { 6935 ISLocalToGlobalMapping NtoSall; 6936 IS is_V; 6937 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V)); 6938 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall)); 6939 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall)); 6940 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6941 PetscCall(ISDestroy(&is_V)); 6942 } 6943 PetscCall(ISDestroy(&is_all_N)); 6944 PetscCall(MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 6945 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6946 PetscCall(PetscObjectReference((PetscObject)S_new)); 6947 if (pcbddc->deluxe_zerorows) { 6948 const PetscScalar *array; 6949 const PetscInt *idxs_V,*idxs_all; 6950 PetscInt i,n_V; 6951 6952 PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 6953 PetscCall(ISGetLocalSize(is_V_Sall,&n_V)); 6954 PetscCall(ISGetIndices(is_V_Sall,&idxs_V)); 6955 PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs_all)); 6956 PetscCall(VecGetArrayRead(pcis->D,&array)); 6957 for (i=0;i<n_V;i++) { 6958 PetscScalar val; 6959 PetscInt idx; 6960 6961 idx = idxs_V[i]; 6962 val = array[idxs_all[idxs_V[i]]]; 6963 PetscCall(MatSetValue(S_new,idx,idx,val,INSERT_VALUES)); 6964 } 6965 PetscCall(MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY)); 6966 PetscCall(MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY)); 6967 PetscCall(VecRestoreArrayRead(pcis->D,&array)); 6968 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all)); 6969 PetscCall(ISRestoreIndices(is_V_Sall,&idxs_V)); 6970 } 6971 sub_schurs->S_Ej_all = S_new; 6972 PetscCall(MatDestroy(&S_new)); 6973 if (sub_schurs->sum_S_Ej_all) { 6974 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 6975 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 6976 PetscCall(PetscObjectReference((PetscObject)S_new)); 6977 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 6978 sub_schurs->sum_S_Ej_all = S_new; 6979 PetscCall(MatDestroy(&S_new)); 6980 } 6981 PetscCall(ISDestroy(&is_V_Sall)); 6982 PetscCall(MatDestroy(&tmat)); 6983 } 6984 /* destroy any change of basis context in sub_schurs */ 6985 if (sub_schurs && sub_schurs->change) { 6986 PetscInt i; 6987 6988 for (i=0;i<sub_schurs->n_subs;i++) { 6989 PetscCall(KSPDestroy(&sub_schurs->change[i])); 6990 } 6991 PetscCall(PetscFree(sub_schurs->change)); 6992 } 6993 } 6994 if (pcbddc->switch_static) { /* need to save the local change */ 6995 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6996 } else { 6997 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 6998 } 6999 /* determine if any process has changed the pressures locally */ 7000 pcbddc->change_interior = pcbddc->benign_have_null; 7001 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7002 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7003 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7004 pcbddc->use_qr_single = qr_needed; 7005 } 7006 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7007 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7008 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7009 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7010 } else { 7011 Mat benign_global = NULL; 7012 if (pcbddc->benign_have_null) { 7013 Mat M; 7014 7015 pcbddc->change_interior = PETSC_TRUE; 7016 PetscCall(VecCopy(matis->counter,pcis->vec1_N)); 7017 PetscCall(VecReciprocal(pcis->vec1_N)); 7018 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global)); 7019 if (pcbddc->benign_change) { 7020 PetscCall(MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M)); 7021 PetscCall(MatDiagonalScale(M,pcis->vec1_N,NULL)); 7022 } else { 7023 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M)); 7024 PetscCall(MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES)); 7025 } 7026 PetscCall(MatISSetLocalMat(benign_global,M)); 7027 PetscCall(MatDestroy(&M)); 7028 PetscCall(MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY)); 7029 PetscCall(MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY)); 7030 } 7031 if (pcbddc->user_ChangeOfBasisMatrix) { 7032 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix)); 7033 PetscCall(MatDestroy(&benign_global)); 7034 } else if (pcbddc->benign_have_null) { 7035 pcbddc->ChangeOfBasisMatrix = benign_global; 7036 } 7037 } 7038 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7039 IS is_global; 7040 const PetscInt *gidxs; 7041 7042 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping,&gidxs)); 7043 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global)); 7044 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping,&gidxs)); 7045 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change)); 7046 PetscCall(ISDestroy(&is_global)); 7047 } 7048 } 7049 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7050 PetscCall(VecDuplicate(pcis->vec1_global,&pcbddc->work_change)); 7051 } 7052 7053 if (!pcbddc->fake_change) { 7054 /* add pressure dofs to set of primal nodes for numbering purposes */ 7055 for (i=0;i<pcbddc->benign_n;i++) { 7056 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7057 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7058 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7059 pcbddc->local_primal_size_cc++; 7060 pcbddc->local_primal_size++; 7061 } 7062 7063 /* check if a new primal space has been introduced (also take into account benign trick) */ 7064 pcbddc->new_primal_space_local = PETSC_TRUE; 7065 if (olocal_primal_size == pcbddc->local_primal_size) { 7066 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7067 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7068 if (!pcbddc->new_primal_space_local) { 7069 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7070 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7071 } 7072 } 7073 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7074 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 7075 } 7076 PetscCall(PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult)); 7077 7078 /* flush dbg viewer */ 7079 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7080 7081 /* free workspace */ 7082 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7083 PetscCall(PetscBTDestroy(&change_basis)); 7084 if (!pcbddc->adaptive_selection) { 7085 PetscCall(PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n)); 7086 PetscCall(PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B)); 7087 } else { 7088 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n,pcbddc->adaptive_constraints_idxs_ptr,pcbddc->adaptive_constraints_data_ptr,pcbddc->adaptive_constraints_idxs,pcbddc->adaptive_constraints_data)); 7089 PetscCall(PetscFree(constraints_n)); 7090 PetscCall(PetscFree(constraints_idxs_B)); 7091 } 7092 PetscFunctionReturn(0); 7093 } 7094 7095 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7096 { 7097 ISLocalToGlobalMapping map; 7098 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7099 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7100 PetscInt i,N; 7101 PetscBool rcsr = PETSC_FALSE; 7102 7103 PetscFunctionBegin; 7104 if (pcbddc->recompute_topography) { 7105 pcbddc->graphanalyzed = PETSC_FALSE; 7106 /* Reset previously computed graph */ 7107 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7108 /* Init local Graph struct */ 7109 PetscCall(MatGetSize(pc->pmat,&N,NULL)); 7110 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&map,NULL)); 7111 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount)); 7112 7113 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7114 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local)); 7115 } 7116 /* Check validity of the csr graph passed in by the user */ 7117 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7118 7119 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7120 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7121 PetscInt *xadj,*adjncy; 7122 PetscInt nvtxs; 7123 PetscBool flg_row=PETSC_FALSE; 7124 7125 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7126 if (flg_row) { 7127 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES)); 7128 pcbddc->computed_rowadj = PETSC_TRUE; 7129 } 7130 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7131 rcsr = PETSC_TRUE; 7132 } 7133 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7134 7135 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7136 PetscReal *lcoords; 7137 PetscInt n; 7138 MPI_Datatype dimrealtype; 7139 7140 /* TODO: support for blocked */ 7141 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7142 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7143 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords)); 7144 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype)); 7145 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7146 PetscCall(PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7147 PetscCall(PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7148 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7149 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7150 7151 pcbddc->mat_graph->coords = lcoords; 7152 pcbddc->mat_graph->cloc = PETSC_TRUE; 7153 pcbddc->mat_graph->cnloc = n; 7154 } 7155 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7156 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7157 7158 /* Setup of Graph */ 7159 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7160 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local)); 7161 7162 /* attach info on disconnected subdomains if present */ 7163 if (pcbddc->n_local_subs) { 7164 PetscInt *local_subs,n,totn; 7165 7166 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7167 PetscCall(PetscMalloc1(n,&local_subs)); 7168 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7169 for (i=0;i<pcbddc->n_local_subs;i++) { 7170 const PetscInt *idxs; 7171 PetscInt nl,j; 7172 7173 PetscCall(ISGetLocalSize(pcbddc->local_subs[i],&nl)); 7174 PetscCall(ISGetIndices(pcbddc->local_subs[i],&idxs)); 7175 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7176 PetscCall(ISRestoreIndices(pcbddc->local_subs[i],&idxs)); 7177 } 7178 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7179 pcbddc->mat_graph->n_local_subs = totn + 1; 7180 pcbddc->mat_graph->local_subs = local_subs; 7181 } 7182 } 7183 7184 if (!pcbddc->graphanalyzed) { 7185 /* Graph's connected components analysis */ 7186 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7187 pcbddc->graphanalyzed = PETSC_TRUE; 7188 pcbddc->corner_selected = pcbddc->corner_selection; 7189 } 7190 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7191 PetscFunctionReturn(0); 7192 } 7193 7194 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7195 { 7196 PetscInt i,j,n; 7197 PetscScalar *alphas; 7198 PetscReal norm,*onorms; 7199 7200 PetscFunctionBegin; 7201 n = *nio; 7202 if (!n) PetscFunctionReturn(0); 7203 PetscCall(PetscMalloc2(n,&alphas,n,&onorms)); 7204 PetscCall(VecNormalize(vecs[0],&norm)); 7205 if (norm < PETSC_SMALL) { 7206 onorms[0] = 0.0; 7207 PetscCall(VecSet(vecs[0],0.0)); 7208 } else { 7209 onorms[0] = norm; 7210 } 7211 7212 for (i=1;i<n;i++) { 7213 PetscCall(VecMDot(vecs[i],i,vecs,alphas)); 7214 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7215 PetscCall(VecMAXPY(vecs[i],i,alphas,vecs)); 7216 PetscCall(VecNormalize(vecs[i],&norm)); 7217 if (norm < PETSC_SMALL) { 7218 onorms[i] = 0.0; 7219 PetscCall(VecSet(vecs[i],0.0)); 7220 } else { 7221 onorms[i] = norm; 7222 } 7223 } 7224 /* push nonzero vectors at the beginning */ 7225 for (i=0;i<n;i++) { 7226 if (onorms[i] == 0.0) { 7227 for (j=i+1;j<n;j++) { 7228 if (onorms[j] != 0.0) { 7229 PetscCall(VecCopy(vecs[j],vecs[i])); 7230 onorms[j] = 0.0; 7231 } 7232 } 7233 } 7234 } 7235 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7236 PetscCall(PetscFree2(alphas,onorms)); 7237 PetscFunctionReturn(0); 7238 } 7239 7240 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7241 { 7242 ISLocalToGlobalMapping mapping; 7243 Mat A; 7244 PetscInt n_neighs,*neighs,*n_shared,**shared; 7245 PetscMPIInt size,rank,color; 7246 PetscInt *xadj,*adjncy; 7247 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7248 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7249 PetscInt void_procs,*procs_candidates = NULL; 7250 PetscInt xadj_count,*count; 7251 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7252 PetscSubcomm psubcomm; 7253 MPI_Comm subcomm; 7254 7255 PetscFunctionBegin; 7256 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7257 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7258 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7259 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7260 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7261 PetscCheck(*n_subdomains >0,PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %" PetscInt_FMT,*n_subdomains); 7262 7263 if (have_void) *have_void = PETSC_FALSE; 7264 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size)); 7265 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 7266 PetscCall(MatISGetLocalMat(mat,&A)); 7267 PetscCall(MatGetLocalSize(A,&n,NULL)); 7268 im_active = !!n; 7269 PetscCall(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat))); 7270 void_procs = size - active_procs; 7271 /* get ranks of of non-active processes in mat communicator */ 7272 if (void_procs) { 7273 PetscInt ncand; 7274 7275 if (have_void) *have_void = PETSC_TRUE; 7276 PetscCall(PetscMalloc1(size,&procs_candidates)); 7277 PetscCallMPI(MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 7278 for (i=0,ncand=0;i<size;i++) { 7279 if (!procs_candidates[i]) { 7280 procs_candidates[ncand++] = i; 7281 } 7282 } 7283 /* force n_subdomains to be not greater that the number of non-active processes */ 7284 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7285 } 7286 7287 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7288 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7289 PetscCall(MatGetSize(mat,&N,NULL)); 7290 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7291 PetscInt issize,isidx,dest; 7292 if (*n_subdomains == 1) dest = 0; 7293 else dest = rank; 7294 if (im_active) { 7295 issize = 1; 7296 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7297 isidx = procs_candidates[dest]; 7298 } else { 7299 isidx = dest; 7300 } 7301 } else { 7302 issize = 0; 7303 isidx = -1; 7304 } 7305 if (*n_subdomains != 1) *n_subdomains = active_procs; 7306 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends)); 7307 PetscCall(PetscFree(procs_candidates)); 7308 PetscFunctionReturn(0); 7309 } 7310 PetscCall(PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL)); 7311 PetscCall(PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL)); 7312 threshold = PetscMax(threshold,2); 7313 7314 /* Get info on mapping */ 7315 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7316 PetscCall(ISLocalToGlobalMappingGetInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7317 7318 /* build local CSR graph of subdomains' connectivity */ 7319 PetscCall(PetscMalloc1(2,&xadj)); 7320 xadj[0] = 0; 7321 xadj[1] = PetscMax(n_neighs-1,0); 7322 PetscCall(PetscMalloc1(xadj[1],&adjncy)); 7323 PetscCall(PetscMalloc1(xadj[1],&adjncy_wgt)); 7324 PetscCall(PetscCalloc1(n,&count)); 7325 for (i=1;i<n_neighs;i++) 7326 for (j=0;j<n_shared[i];j++) 7327 count[shared[i][j]] += 1; 7328 7329 xadj_count = 0; 7330 for (i=1;i<n_neighs;i++) { 7331 for (j=0;j<n_shared[i];j++) { 7332 if (count[shared[i][j]] < threshold) { 7333 adjncy[xadj_count] = neighs[i]; 7334 adjncy_wgt[xadj_count] = n_shared[i]; 7335 xadj_count++; 7336 break; 7337 } 7338 } 7339 } 7340 xadj[1] = xadj_count; 7341 PetscCall(PetscFree(count)); 7342 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7343 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7344 7345 PetscCall(PetscMalloc1(1,&ranks_send_to_idx)); 7346 7347 /* Restrict work on active processes only */ 7348 PetscCall(PetscMPIIntCast(im_active,&color)); 7349 if (void_procs) { 7350 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm)); 7351 PetscCall(PetscSubcommSetNumber(psubcomm,2)); /* 2 groups, active process and not active processes */ 7352 PetscCall(PetscSubcommSetTypeGeneral(psubcomm,color,rank)); 7353 subcomm = PetscSubcommChild(psubcomm); 7354 } else { 7355 psubcomm = NULL; 7356 subcomm = PetscObjectComm((PetscObject)mat); 7357 } 7358 7359 v_wgt = NULL; 7360 if (!color) { 7361 PetscCall(PetscFree(xadj)); 7362 PetscCall(PetscFree(adjncy)); 7363 PetscCall(PetscFree(adjncy_wgt)); 7364 } else { 7365 Mat subdomain_adj; 7366 IS new_ranks,new_ranks_contig; 7367 MatPartitioning partitioner; 7368 PetscInt rstart=0,rend=0; 7369 PetscInt *is_indices,*oldranks; 7370 PetscMPIInt size; 7371 PetscBool aggregate; 7372 7373 PetscCallMPI(MPI_Comm_size(subcomm,&size)); 7374 if (void_procs) { 7375 PetscInt prank = rank; 7376 PetscCall(PetscMalloc1(size,&oldranks)); 7377 PetscCallMPI(MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm)); 7378 for (i=0;i<xadj[1];i++) { 7379 PetscCall(PetscFindInt(adjncy[i],size,oldranks,&adjncy[i])); 7380 } 7381 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7382 } else { 7383 oldranks = NULL; 7384 } 7385 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7386 if (aggregate) { /* TODO: all this part could be made more efficient */ 7387 PetscInt lrows,row,ncols,*cols; 7388 PetscMPIInt nrank; 7389 PetscScalar *vals; 7390 7391 PetscCallMPI(MPI_Comm_rank(subcomm,&nrank)); 7392 lrows = 0; 7393 if (nrank<redprocs) { 7394 lrows = size/redprocs; 7395 if (nrank<size%redprocs) lrows++; 7396 } 7397 PetscCall(MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj)); 7398 PetscCall(MatGetOwnershipRange(subdomain_adj,&rstart,&rend)); 7399 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 7400 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 7401 row = nrank; 7402 ncols = xadj[1]-xadj[0]; 7403 cols = adjncy; 7404 PetscCall(PetscMalloc1(ncols,&vals)); 7405 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7406 PetscCall(MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES)); 7407 PetscCall(MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7408 PetscCall(MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7409 PetscCall(PetscFree(xadj)); 7410 PetscCall(PetscFree(adjncy)); 7411 PetscCall(PetscFree(adjncy_wgt)); 7412 PetscCall(PetscFree(vals)); 7413 if (use_vwgt) { 7414 Vec v; 7415 const PetscScalar *array; 7416 PetscInt nl; 7417 7418 PetscCall(MatCreateVecs(subdomain_adj,&v,NULL)); 7419 PetscCall(VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES)); 7420 PetscCall(VecAssemblyBegin(v)); 7421 PetscCall(VecAssemblyEnd(v)); 7422 PetscCall(VecGetLocalSize(v,&nl)); 7423 PetscCall(VecGetArrayRead(v,&array)); 7424 PetscCall(PetscMalloc1(nl,&v_wgt)); 7425 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7426 PetscCall(VecRestoreArrayRead(v,&array)); 7427 PetscCall(VecDestroy(&v)); 7428 } 7429 } else { 7430 PetscCall(MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj)); 7431 if (use_vwgt) { 7432 PetscCall(PetscMalloc1(1,&v_wgt)); 7433 v_wgt[0] = n; 7434 } 7435 } 7436 /* PetscCall(MatView(subdomain_adj,0)); */ 7437 7438 /* Partition */ 7439 PetscCall(MatPartitioningCreate(subcomm,&partitioner)); 7440 #if defined(PETSC_HAVE_PTSCOTCH) 7441 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH)); 7442 #elif defined(PETSC_HAVE_PARMETIS) 7443 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS)); 7444 #else 7445 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE)); 7446 #endif 7447 PetscCall(MatPartitioningSetAdjacency(partitioner,subdomain_adj)); 7448 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner,v_wgt)); 7449 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7450 PetscCall(MatPartitioningSetNParts(partitioner,*n_subdomains)); 7451 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7452 PetscCall(MatPartitioningApply(partitioner,&new_ranks)); 7453 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7454 7455 /* renumber new_ranks to avoid "holes" in new set of processors */ 7456 PetscCall(ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig)); 7457 PetscCall(ISDestroy(&new_ranks)); 7458 PetscCall(ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7459 if (!aggregate) { 7460 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7461 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7462 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7463 } else if (oldranks) { 7464 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7465 } else { 7466 ranks_send_to_idx[0] = is_indices[0]; 7467 } 7468 } else { 7469 PetscInt idx = 0; 7470 PetscMPIInt tag; 7471 MPI_Request *reqs; 7472 7473 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag)); 7474 PetscCall(PetscMalloc1(rend-rstart,&reqs)); 7475 for (i=rstart;i<rend;i++) { 7476 PetscCallMPI(MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart])); 7477 } 7478 PetscCallMPI(MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE)); 7479 PetscCallMPI(MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE)); 7480 PetscCall(PetscFree(reqs)); 7481 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7482 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7483 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7484 } else if (oldranks) { 7485 ranks_send_to_idx[0] = oldranks[idx]; 7486 } else { 7487 ranks_send_to_idx[0] = idx; 7488 } 7489 } 7490 PetscCall(ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7491 /* clean up */ 7492 PetscCall(PetscFree(oldranks)); 7493 PetscCall(ISDestroy(&new_ranks_contig)); 7494 PetscCall(MatDestroy(&subdomain_adj)); 7495 PetscCall(MatPartitioningDestroy(&partitioner)); 7496 } 7497 PetscCall(PetscSubcommDestroy(&psubcomm)); 7498 PetscCall(PetscFree(procs_candidates)); 7499 7500 /* assemble parallel IS for sends */ 7501 i = 1; 7502 if (!color) i=0; 7503 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends)); 7504 PetscFunctionReturn(0); 7505 } 7506 7507 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7508 7509 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7510 { 7511 Mat local_mat; 7512 IS is_sends_internal; 7513 PetscInt rows,cols,new_local_rows; 7514 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7515 PetscBool ismatis,isdense,newisdense,destroy_mat; 7516 ISLocalToGlobalMapping l2gmap; 7517 PetscInt* l2gmap_indices; 7518 const PetscInt* is_indices; 7519 MatType new_local_type; 7520 /* buffers */ 7521 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7522 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7523 PetscInt *recv_buffer_idxs_local; 7524 PetscScalar *ptr_vals,*recv_buffer_vals; 7525 const PetscScalar *send_buffer_vals; 7526 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7527 /* MPI */ 7528 MPI_Comm comm,comm_n; 7529 PetscSubcomm subcomm; 7530 PetscMPIInt n_sends,n_recvs,size; 7531 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7532 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7533 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7534 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7535 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7536 7537 PetscFunctionBegin; 7538 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7539 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7540 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7541 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7542 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7543 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7544 PetscValidLogicalCollectiveBool(mat,reuse,6); 7545 PetscValidLogicalCollectiveInt(mat,nis,8); 7546 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7547 if (nvecs) { 7548 PetscCheck(nvecs <= 1,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7549 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7550 } 7551 /* further checks */ 7552 PetscCall(MatISGetLocalMat(mat,&local_mat)); 7553 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense)); 7554 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7555 PetscCall(MatGetSize(local_mat,&rows,&cols)); 7556 PetscCheck(rows == cols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7557 if (reuse && *mat_n) { 7558 PetscInt mrows,mcols,mnrows,mncols; 7559 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7560 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis)); 7561 PetscCheck(ismatis,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7562 PetscCall(MatGetSize(mat,&mrows,&mcols)); 7563 PetscCall(MatGetSize(*mat_n,&mnrows,&mncols)); 7564 PetscCheck(mrows == mnrows,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT,mrows,mnrows); 7565 PetscCheck(mcols == mncols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT,mcols,mncols); 7566 } 7567 PetscCall(MatGetBlockSize(local_mat,&bs)); 7568 PetscValidLogicalCollectiveInt(mat,bs,1); 7569 7570 /* prepare IS for sending if not provided */ 7571 if (!is_sends) { 7572 PetscCheck(n_subdomains,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7573 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL)); 7574 } else { 7575 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7576 is_sends_internal = is_sends; 7577 } 7578 7579 /* get comm */ 7580 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 7581 7582 /* compute number of sends */ 7583 PetscCall(ISGetLocalSize(is_sends_internal,&i)); 7584 PetscCall(PetscMPIIntCast(i,&n_sends)); 7585 7586 /* compute number of receives */ 7587 PetscCallMPI(MPI_Comm_size(comm,&size)); 7588 PetscCall(PetscMalloc1(size,&iflags)); 7589 PetscCall(PetscArrayzero(iflags,size)); 7590 PetscCall(ISGetIndices(is_sends_internal,&is_indices)); 7591 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7592 PetscCall(PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs)); 7593 PetscCall(PetscFree(iflags)); 7594 7595 /* restrict comm if requested */ 7596 subcomm = NULL; 7597 destroy_mat = PETSC_FALSE; 7598 if (restrict_comm) { 7599 PetscMPIInt color,subcommsize; 7600 7601 color = 0; 7602 if (restrict_full) { 7603 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7604 } else { 7605 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7606 } 7607 PetscCall(MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm)); 7608 subcommsize = size - subcommsize; 7609 /* check if reuse has been requested */ 7610 if (reuse) { 7611 if (*mat_n) { 7612 PetscMPIInt subcommsize2; 7613 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2)); 7614 PetscCheck(subcommsize == subcommsize2,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7615 comm_n = PetscObjectComm((PetscObject)*mat_n); 7616 } else { 7617 comm_n = PETSC_COMM_SELF; 7618 } 7619 } else { /* MAT_INITIAL_MATRIX */ 7620 PetscMPIInt rank; 7621 7622 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 7623 PetscCall(PetscSubcommCreate(comm,&subcomm)); 7624 PetscCall(PetscSubcommSetNumber(subcomm,2)); 7625 PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank)); 7626 comm_n = PetscSubcommChild(subcomm); 7627 } 7628 /* flag to destroy *mat_n if not significative */ 7629 if (color) destroy_mat = PETSC_TRUE; 7630 } else { 7631 comm_n = comm; 7632 } 7633 7634 /* prepare send/receive buffers */ 7635 PetscCall(PetscMalloc1(size,&ilengths_idxs)); 7636 PetscCall(PetscArrayzero(ilengths_idxs,size)); 7637 PetscCall(PetscMalloc1(size,&ilengths_vals)); 7638 PetscCall(PetscArrayzero(ilengths_vals,size)); 7639 if (nis) { 7640 PetscCall(PetscCalloc1(size,&ilengths_idxs_is)); 7641 } 7642 7643 /* Get data from local matrices */ 7644 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7645 /* TODO: See below some guidelines on how to prepare the local buffers */ 7646 /* 7647 send_buffer_vals should contain the raw values of the local matrix 7648 send_buffer_idxs should contain: 7649 - MatType_PRIVATE type 7650 - PetscInt size_of_l2gmap 7651 - PetscInt global_row_indices[size_of_l2gmap] 7652 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7653 */ 7654 { 7655 ISLocalToGlobalMapping mapping; 7656 7657 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7658 PetscCall(MatDenseGetArrayRead(local_mat,&send_buffer_vals)); 7659 PetscCall(ISLocalToGlobalMappingGetSize(mapping,&i)); 7660 PetscCall(PetscMalloc1(i+2,&send_buffer_idxs)); 7661 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7662 send_buffer_idxs[1] = i; 7663 PetscCall(ISLocalToGlobalMappingGetIndices(mapping,(const PetscInt**)&ptr_idxs)); 7664 PetscCall(PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i)); 7665 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping,(const PetscInt**)&ptr_idxs)); 7666 PetscCall(PetscMPIIntCast(i,&len)); 7667 for (i=0;i<n_sends;i++) { 7668 ilengths_vals[is_indices[i]] = len*len; 7669 ilengths_idxs[is_indices[i]] = len+2; 7670 } 7671 } 7672 PetscCall(PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals)); 7673 /* additional is (if any) */ 7674 if (nis) { 7675 PetscMPIInt psum; 7676 PetscInt j; 7677 for (j=0,psum=0;j<nis;j++) { 7678 PetscInt plen; 7679 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7680 PetscCall(PetscMPIIntCast(plen,&len)); 7681 psum += len+1; /* indices + length */ 7682 } 7683 PetscCall(PetscMalloc1(psum,&send_buffer_idxs_is)); 7684 for (j=0,psum=0;j<nis;j++) { 7685 PetscInt plen; 7686 const PetscInt *is_array_idxs; 7687 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7688 send_buffer_idxs_is[psum] = plen; 7689 PetscCall(ISGetIndices(isarray[j],&is_array_idxs)); 7690 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen)); 7691 PetscCall(ISRestoreIndices(isarray[j],&is_array_idxs)); 7692 psum += plen+1; /* indices + length */ 7693 } 7694 for (i=0;i<n_sends;i++) { 7695 ilengths_idxs_is[is_indices[i]] = psum; 7696 } 7697 PetscCall(PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is)); 7698 } 7699 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 7700 7701 buf_size_idxs = 0; 7702 buf_size_vals = 0; 7703 buf_size_idxs_is = 0; 7704 buf_size_vecs = 0; 7705 for (i=0;i<n_recvs;i++) { 7706 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7707 buf_size_vals += (PetscInt)olengths_vals[i]; 7708 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7709 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7710 } 7711 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs)); 7712 PetscCall(PetscMalloc1(buf_size_vals,&recv_buffer_vals)); 7713 PetscCall(PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is)); 7714 PetscCall(PetscMalloc1(buf_size_vecs,&recv_buffer_vecs)); 7715 7716 /* get new tags for clean communications */ 7717 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs)); 7718 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vals)); 7719 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is)); 7720 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vecs)); 7721 7722 /* allocate for requests */ 7723 PetscCall(PetscMalloc1(n_sends,&send_req_idxs)); 7724 PetscCall(PetscMalloc1(n_sends,&send_req_vals)); 7725 PetscCall(PetscMalloc1(n_sends,&send_req_idxs_is)); 7726 PetscCall(PetscMalloc1(n_sends,&send_req_vecs)); 7727 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs)); 7728 PetscCall(PetscMalloc1(n_recvs,&recv_req_vals)); 7729 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs_is)); 7730 PetscCall(PetscMalloc1(n_recvs,&recv_req_vecs)); 7731 7732 /* communications */ 7733 ptr_idxs = recv_buffer_idxs; 7734 ptr_vals = recv_buffer_vals; 7735 ptr_idxs_is = recv_buffer_idxs_is; 7736 ptr_vecs = recv_buffer_vecs; 7737 for (i=0;i<n_recvs;i++) { 7738 source_dest = onodes[i]; 7739 PetscCallMPI(MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i])); 7740 PetscCallMPI(MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i])); 7741 ptr_idxs += olengths_idxs[i]; 7742 ptr_vals += olengths_vals[i]; 7743 if (nis) { 7744 source_dest = onodes_is[i]; 7745 PetscCallMPI(MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i])); 7746 ptr_idxs_is += olengths_idxs_is[i]; 7747 } 7748 if (nvecs) { 7749 source_dest = onodes[i]; 7750 PetscCallMPI(MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i])); 7751 ptr_vecs += olengths_idxs[i]-2; 7752 } 7753 } 7754 for (i=0;i<n_sends;i++) { 7755 PetscCall(PetscMPIIntCast(is_indices[i],&source_dest)); 7756 PetscCallMPI(MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i])); 7757 PetscCallMPI(MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i])); 7758 if (nis) { 7759 PetscCallMPI(MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i])); 7760 } 7761 if (nvecs) { 7762 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 7763 PetscCallMPI(MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i])); 7764 } 7765 } 7766 PetscCall(ISRestoreIndices(is_sends_internal,&is_indices)); 7767 PetscCall(ISDestroy(&is_sends_internal)); 7768 7769 /* assemble new l2g map */ 7770 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE)); 7771 ptr_idxs = recv_buffer_idxs; 7772 new_local_rows = 0; 7773 for (i=0;i<n_recvs;i++) { 7774 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7775 ptr_idxs += olengths_idxs[i]; 7776 } 7777 PetscCall(PetscMalloc1(new_local_rows,&l2gmap_indices)); 7778 ptr_idxs = recv_buffer_idxs; 7779 new_local_rows = 0; 7780 for (i=0;i<n_recvs;i++) { 7781 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1))); 7782 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7783 ptr_idxs += olengths_idxs[i]; 7784 } 7785 PetscCall(PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices)); 7786 PetscCall(ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap)); 7787 PetscCall(PetscFree(l2gmap_indices)); 7788 7789 /* infer new local matrix type from received local matrices type */ 7790 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7791 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7792 if (n_recvs) { 7793 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7794 ptr_idxs = recv_buffer_idxs; 7795 for (i=0;i<n_recvs;i++) { 7796 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7797 new_local_type_private = MATAIJ_PRIVATE; 7798 break; 7799 } 7800 ptr_idxs += olengths_idxs[i]; 7801 } 7802 switch (new_local_type_private) { 7803 case MATDENSE_PRIVATE: 7804 new_local_type = MATSEQAIJ; 7805 bs = 1; 7806 break; 7807 case MATAIJ_PRIVATE: 7808 new_local_type = MATSEQAIJ; 7809 bs = 1; 7810 break; 7811 case MATBAIJ_PRIVATE: 7812 new_local_type = MATSEQBAIJ; 7813 break; 7814 case MATSBAIJ_PRIVATE: 7815 new_local_type = MATSEQSBAIJ; 7816 break; 7817 default: 7818 SETERRQ(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7819 } 7820 } else { /* by default, new_local_type is seqaij */ 7821 new_local_type = MATSEQAIJ; 7822 bs = 1; 7823 } 7824 7825 /* create MATIS object if needed */ 7826 if (!reuse) { 7827 PetscCall(MatGetSize(mat,&rows,&cols)); 7828 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7829 } else { 7830 /* it also destroys the local matrices */ 7831 if (*mat_n) { 7832 PetscCall(MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap)); 7833 } else { /* this is a fake object */ 7834 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7835 } 7836 } 7837 PetscCall(MatISGetLocalMat(*mat_n,&local_mat)); 7838 PetscCall(MatSetType(local_mat,new_local_type)); 7839 7840 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE)); 7841 7842 /* Global to local map of received indices */ 7843 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local)); /* needed for values insertion */ 7844 PetscCall(ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local)); 7845 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7846 7847 /* restore attributes -> type of incoming data and its size */ 7848 buf_size_idxs = 0; 7849 for (i=0;i<n_recvs;i++) { 7850 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7851 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7852 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7853 } 7854 PetscCall(PetscFree(recv_buffer_idxs)); 7855 7856 /* set preallocation */ 7857 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense)); 7858 if (!newisdense) { 7859 PetscInt *new_local_nnz=NULL; 7860 7861 ptr_idxs = recv_buffer_idxs_local; 7862 if (n_recvs) { 7863 PetscCall(PetscCalloc1(new_local_rows,&new_local_nnz)); 7864 } 7865 for (i=0;i<n_recvs;i++) { 7866 PetscInt j; 7867 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7868 for (j=0;j<*(ptr_idxs+1);j++) { 7869 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7870 } 7871 } else { 7872 /* TODO */ 7873 } 7874 ptr_idxs += olengths_idxs[i]; 7875 } 7876 if (new_local_nnz) { 7877 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7878 PetscCall(MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz)); 7879 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7880 PetscCall(MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7881 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7882 PetscCall(MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7883 } else { 7884 PetscCall(MatSetUp(local_mat)); 7885 } 7886 PetscCall(PetscFree(new_local_nnz)); 7887 } else { 7888 PetscCall(MatSetUp(local_mat)); 7889 } 7890 7891 /* set values */ 7892 ptr_vals = recv_buffer_vals; 7893 ptr_idxs = recv_buffer_idxs_local; 7894 for (i=0;i<n_recvs;i++) { 7895 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7896 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 7897 PetscCall(MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES)); 7898 PetscCall(MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY)); 7899 PetscCall(MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY)); 7900 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 7901 } else { 7902 /* TODO */ 7903 } 7904 ptr_idxs += olengths_idxs[i]; 7905 ptr_vals += olengths_vals[i]; 7906 } 7907 PetscCall(MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY)); 7908 PetscCall(MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY)); 7909 PetscCall(MatISRestoreLocalMat(*mat_n,&local_mat)); 7910 PetscCall(MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY)); 7911 PetscCall(MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY)); 7912 PetscCall(PetscFree(recv_buffer_vals)); 7913 7914 #if 0 7915 if (!restrict_comm) { /* check */ 7916 Vec lvec,rvec; 7917 PetscReal infty_error; 7918 7919 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7920 PetscCall(VecSetRandom(rvec,NULL)); 7921 PetscCall(MatMult(mat,rvec,lvec)); 7922 PetscCall(VecScale(lvec,-1.0)); 7923 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7924 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7925 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7926 PetscCall(VecDestroy(&rvec)); 7927 PetscCall(VecDestroy(&lvec)); 7928 } 7929 #endif 7930 7931 /* assemble new additional is (if any) */ 7932 if (nis) { 7933 PetscInt **temp_idxs,*count_is,j,psum; 7934 7935 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE)); 7936 PetscCall(PetscCalloc1(nis,&count_is)); 7937 ptr_idxs = recv_buffer_idxs_is; 7938 psum = 0; 7939 for (i=0;i<n_recvs;i++) { 7940 for (j=0;j<nis;j++) { 7941 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7942 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7943 psum += plen; 7944 ptr_idxs += plen+1; /* shift pointer to received data */ 7945 } 7946 } 7947 PetscCall(PetscMalloc1(nis,&temp_idxs)); 7948 PetscCall(PetscMalloc1(psum,&temp_idxs[0])); 7949 for (i=1;i<nis;i++) { 7950 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7951 } 7952 PetscCall(PetscArrayzero(count_is,nis)); 7953 ptr_idxs = recv_buffer_idxs_is; 7954 for (i=0;i<n_recvs;i++) { 7955 for (j=0;j<nis;j++) { 7956 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7957 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen)); 7958 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7959 ptr_idxs += plen+1; /* shift pointer to received data */ 7960 } 7961 } 7962 for (i=0;i<nis;i++) { 7963 PetscCall(ISDestroy(&isarray[i])); 7964 PetscCall(PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i])); 7965 PetscCall(ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i])); 7966 } 7967 PetscCall(PetscFree(count_is)); 7968 PetscCall(PetscFree(temp_idxs[0])); 7969 PetscCall(PetscFree(temp_idxs)); 7970 } 7971 /* free workspace */ 7972 PetscCall(PetscFree(recv_buffer_idxs_is)); 7973 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE)); 7974 PetscCall(PetscFree(send_buffer_idxs)); 7975 PetscCallMPI(MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE)); 7976 if (isdense) { 7977 PetscCall(MatISGetLocalMat(mat,&local_mat)); 7978 PetscCall(MatDenseRestoreArrayRead(local_mat,&send_buffer_vals)); 7979 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 7980 } else { 7981 /* PetscCall(PetscFree(send_buffer_vals)); */ 7982 } 7983 if (nis) { 7984 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE)); 7985 PetscCall(PetscFree(send_buffer_idxs_is)); 7986 } 7987 7988 if (nvecs) { 7989 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE)); 7990 PetscCallMPI(MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE)); 7991 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 7992 PetscCall(VecDestroy(&nnsp_vec[0])); 7993 PetscCall(VecCreate(comm_n,&nnsp_vec[0])); 7994 PetscCall(VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE)); 7995 PetscCall(VecSetType(nnsp_vec[0],VECSTANDARD)); 7996 /* set values */ 7997 ptr_vals = recv_buffer_vecs; 7998 ptr_idxs = recv_buffer_idxs_local; 7999 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 8000 for (i=0;i<n_recvs;i++) { 8001 PetscInt j; 8002 for (j=0;j<*(ptr_idxs+1);j++) { 8003 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8004 } 8005 ptr_idxs += olengths_idxs[i]; 8006 ptr_vals += olengths_idxs[i]-2; 8007 } 8008 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8009 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8010 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8011 } 8012 8013 PetscCall(PetscFree(recv_buffer_vecs)); 8014 PetscCall(PetscFree(recv_buffer_idxs_local)); 8015 PetscCall(PetscFree(recv_req_idxs)); 8016 PetscCall(PetscFree(recv_req_vals)); 8017 PetscCall(PetscFree(recv_req_vecs)); 8018 PetscCall(PetscFree(recv_req_idxs_is)); 8019 PetscCall(PetscFree(send_req_idxs)); 8020 PetscCall(PetscFree(send_req_vals)); 8021 PetscCall(PetscFree(send_req_vecs)); 8022 PetscCall(PetscFree(send_req_idxs_is)); 8023 PetscCall(PetscFree(ilengths_vals)); 8024 PetscCall(PetscFree(ilengths_idxs)); 8025 PetscCall(PetscFree(olengths_vals)); 8026 PetscCall(PetscFree(olengths_idxs)); 8027 PetscCall(PetscFree(onodes)); 8028 if (nis) { 8029 PetscCall(PetscFree(ilengths_idxs_is)); 8030 PetscCall(PetscFree(olengths_idxs_is)); 8031 PetscCall(PetscFree(onodes_is)); 8032 } 8033 PetscCall(PetscSubcommDestroy(&subcomm)); 8034 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8035 PetscCall(MatDestroy(mat_n)); 8036 for (i=0;i<nis;i++) { 8037 PetscCall(ISDestroy(&isarray[i])); 8038 } 8039 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8040 PetscCall(VecDestroy(&nnsp_vec[0])); 8041 } 8042 *mat_n = NULL; 8043 } 8044 PetscFunctionReturn(0); 8045 } 8046 8047 /* temporary hack into ksp private data structure */ 8048 #include <petsc/private/kspimpl.h> 8049 8050 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8051 { 8052 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8053 PC_IS *pcis = (PC_IS*)pc->data; 8054 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8055 Mat coarsedivudotp = NULL; 8056 Mat coarseG,t_coarse_mat_is; 8057 MatNullSpace CoarseNullSpace = NULL; 8058 ISLocalToGlobalMapping coarse_islg; 8059 IS coarse_is,*isarray,corners; 8060 PetscInt i,im_active=-1,active_procs=-1; 8061 PetscInt nis,nisdofs,nisneu,nisvert; 8062 PetscInt coarse_eqs_per_proc; 8063 PC pc_temp; 8064 PCType coarse_pc_type; 8065 KSPType coarse_ksp_type; 8066 PetscBool multilevel_requested,multilevel_allowed; 8067 PetscBool coarse_reuse; 8068 PetscInt ncoarse,nedcfield; 8069 PetscBool compute_vecs = PETSC_FALSE; 8070 PetscScalar *array; 8071 MatReuse coarse_mat_reuse; 8072 PetscBool restr, full_restr, have_void; 8073 PetscMPIInt size; 8074 8075 PetscFunctionBegin; 8076 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8077 /* Assign global numbering to coarse dofs */ 8078 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8079 PetscInt ocoarse_size; 8080 compute_vecs = PETSC_TRUE; 8081 8082 pcbddc->new_primal_space = PETSC_TRUE; 8083 ocoarse_size = pcbddc->coarse_size; 8084 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8085 PetscCall(PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices)); 8086 /* see if we can avoid some work */ 8087 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8088 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8089 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8090 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8091 coarse_reuse = PETSC_FALSE; 8092 } else { /* we can safely reuse already computed coarse matrix */ 8093 coarse_reuse = PETSC_TRUE; 8094 } 8095 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8096 coarse_reuse = PETSC_FALSE; 8097 } 8098 /* reset any subassembling information */ 8099 if (!coarse_reuse || pcbddc->recompute_topography) { 8100 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8101 } 8102 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8103 coarse_reuse = PETSC_TRUE; 8104 } 8105 if (coarse_reuse && pcbddc->coarse_ksp) { 8106 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 8107 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8108 coarse_mat_reuse = MAT_REUSE_MATRIX; 8109 } else { 8110 coarse_mat = NULL; 8111 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8112 } 8113 8114 /* creates temporary l2gmap and IS for coarse indexes */ 8115 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is)); 8116 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg)); 8117 8118 /* creates temporary MATIS object for coarse matrix */ 8119 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense)); 8120 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,coarse_islg,&t_coarse_mat_is)); 8121 PetscCall(MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense)); 8122 PetscCall(MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8123 PetscCall(MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8124 PetscCall(MatDestroy(&coarse_submat_dense)); 8125 8126 /* count "active" (i.e. with positive local size) and "void" processes */ 8127 im_active = !!(pcis->n); 8128 PetscCall(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8129 8130 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8131 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8132 /* full_restr : just use the receivers from the subassembling pattern */ 8133 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size)); 8134 coarse_mat_is = NULL; 8135 multilevel_allowed = PETSC_FALSE; 8136 multilevel_requested = PETSC_FALSE; 8137 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8138 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8139 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8140 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8141 if (multilevel_requested) { 8142 ncoarse = active_procs/pcbddc->coarsening_ratio; 8143 restr = PETSC_FALSE; 8144 full_restr = PETSC_FALSE; 8145 } else { 8146 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8147 restr = PETSC_TRUE; 8148 full_restr = PETSC_TRUE; 8149 } 8150 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8151 ncoarse = PetscMax(1,ncoarse); 8152 if (!pcbddc->coarse_subassembling) { 8153 if (pcbddc->coarsening_ratio > 1) { 8154 if (multilevel_requested) { 8155 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8156 } else { 8157 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8158 } 8159 } else { 8160 PetscMPIInt rank; 8161 8162 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank)); 8163 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8164 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling)); 8165 } 8166 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8167 PetscInt psum; 8168 if (pcbddc->coarse_ksp) psum = 1; 8169 else psum = 0; 8170 PetscCall(MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8171 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8172 } 8173 /* determine if we can go multilevel */ 8174 if (multilevel_requested) { 8175 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8176 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8177 } 8178 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8179 8180 /* dump subassembling pattern */ 8181 if (pcbddc->dbg_flag && multilevel_allowed) { 8182 PetscCall(ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer)); 8183 } 8184 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8185 nedcfield = -1; 8186 corners = NULL; 8187 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8188 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8189 const PetscInt *idxs; 8190 ISLocalToGlobalMapping tmap; 8191 8192 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8193 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap)); 8194 /* allocate space for temporary storage */ 8195 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs)); 8196 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs2)); 8197 /* allocate for IS array */ 8198 nisdofs = pcbddc->n_ISForDofsLocal; 8199 if (pcbddc->nedclocal) { 8200 if (pcbddc->nedfield > -1) { 8201 nedcfield = pcbddc->nedfield; 8202 } else { 8203 nedcfield = 0; 8204 PetscCheck(!nisdofs,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%" PetscInt_FMT ")",nisdofs); 8205 nisdofs = 1; 8206 } 8207 } 8208 nisneu = !!pcbddc->NeumannBoundariesLocal; 8209 nisvert = 0; /* nisvert is not used */ 8210 nis = nisdofs + nisneu + nisvert; 8211 PetscCall(PetscMalloc1(nis,&isarray)); 8212 /* dofs splitting */ 8213 for (i=0;i<nisdofs;i++) { 8214 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8215 if (nedcfield != i) { 8216 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize)); 8217 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8218 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8219 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8220 } else { 8221 PetscCall(ISGetLocalSize(pcbddc->nedclocal,&tsize)); 8222 PetscCall(ISGetIndices(pcbddc->nedclocal,&idxs)); 8223 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8224 PetscCheck(tsize == nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT,tsize,nout); 8225 PetscCall(ISRestoreIndices(pcbddc->nedclocal,&idxs)); 8226 } 8227 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8228 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i])); 8229 /* PetscCall(ISView(isarray[i],0)); */ 8230 } 8231 /* neumann boundaries */ 8232 if (pcbddc->NeumannBoundariesLocal) { 8233 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8234 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize)); 8235 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8236 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8237 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8238 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8239 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs])); 8240 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8241 } 8242 /* coordinates */ 8243 if (pcbddc->corner_selected) { 8244 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8245 PetscCall(ISGetLocalSize(corners,&tsize)); 8246 PetscCall(ISGetIndices(corners,&idxs)); 8247 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8248 PetscCheck(tsize == nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT,tsize,nout); 8249 PetscCall(ISRestoreIndices(corners,&idxs)); 8250 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8251 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8252 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners)); 8253 } 8254 PetscCall(PetscFree(tidxs)); 8255 PetscCall(PetscFree(tidxs2)); 8256 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8257 } else { 8258 nis = 0; 8259 nisdofs = 0; 8260 nisneu = 0; 8261 nisvert = 0; 8262 isarray = NULL; 8263 } 8264 /* destroy no longer needed map */ 8265 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8266 8267 /* subassemble */ 8268 if (multilevel_allowed) { 8269 Vec vp[1]; 8270 PetscInt nvecs = 0; 8271 PetscBool reuse,reuser; 8272 8273 if (coarse_mat) reuse = PETSC_TRUE; 8274 else reuse = PETSC_FALSE; 8275 PetscCall(MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8276 vp[0] = NULL; 8277 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8278 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&vp[0])); 8279 PetscCall(VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE)); 8280 PetscCall(VecSetType(vp[0],VECSTANDARD)); 8281 nvecs = 1; 8282 8283 if (pcbddc->divudotp) { 8284 Mat B,loc_divudotp; 8285 Vec v,p; 8286 IS dummy; 8287 PetscInt np; 8288 8289 PetscCall(MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp)); 8290 PetscCall(MatGetSize(loc_divudotp,&np,NULL)); 8291 PetscCall(ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy)); 8292 PetscCall(MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B)); 8293 PetscCall(MatCreateVecs(B,&v,&p)); 8294 PetscCall(VecSet(p,1.)); 8295 PetscCall(MatMultTranspose(B,p,v)); 8296 PetscCall(VecDestroy(&p)); 8297 PetscCall(MatDestroy(&B)); 8298 PetscCall(VecGetArray(vp[0],&array)); 8299 PetscCall(VecPlaceArray(pcbddc->vec1_P,array)); 8300 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P)); 8301 PetscCall(VecResetArray(pcbddc->vec1_P)); 8302 PetscCall(VecRestoreArray(vp[0],&array)); 8303 PetscCall(ISDestroy(&dummy)); 8304 PetscCall(VecDestroy(&v)); 8305 } 8306 } 8307 if (reuser) { 8308 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp)); 8309 } else { 8310 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp)); 8311 } 8312 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8313 PetscScalar *arraym; 8314 const PetscScalar *arrayv; 8315 PetscInt nl; 8316 PetscCall(VecGetLocalSize(vp[0],&nl)); 8317 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp)); 8318 PetscCall(MatDenseGetArray(coarsedivudotp,&arraym)); 8319 PetscCall(VecGetArrayRead(vp[0],&arrayv)); 8320 PetscCall(PetscArraycpy(arraym,arrayv,nl)); 8321 PetscCall(VecRestoreArrayRead(vp[0],&arrayv)); 8322 PetscCall(MatDenseRestoreArray(coarsedivudotp,&arraym)); 8323 PetscCall(VecDestroy(&vp[0])); 8324 } else { 8325 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp)); 8326 } 8327 } else { 8328 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL)); 8329 } 8330 if (coarse_mat_is || coarse_mat) { 8331 if (!multilevel_allowed) { 8332 PetscCall(MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat)); 8333 } else { 8334 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8335 if (coarse_mat_is) { 8336 PetscCheck(!coarse_mat,PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8337 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8338 coarse_mat = coarse_mat_is; 8339 } 8340 } 8341 } 8342 PetscCall(MatDestroy(&t_coarse_mat_is)); 8343 PetscCall(MatDestroy(&coarse_mat_is)); 8344 8345 /* create local to global scatters for coarse problem */ 8346 if (compute_vecs) { 8347 PetscInt lrows; 8348 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8349 if (coarse_mat) { 8350 PetscCall(MatGetLocalSize(coarse_mat,&lrows,NULL)); 8351 } else { 8352 lrows = 0; 8353 } 8354 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec)); 8355 PetscCall(VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE)); 8356 PetscCall(VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8357 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8358 PetscCall(VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob)); 8359 } 8360 PetscCall(ISDestroy(&coarse_is)); 8361 8362 /* set defaults for coarse KSP and PC */ 8363 if (multilevel_allowed) { 8364 coarse_ksp_type = KSPRICHARDSON; 8365 coarse_pc_type = PCBDDC; 8366 } else { 8367 coarse_ksp_type = KSPPREONLY; 8368 coarse_pc_type = PCREDUNDANT; 8369 } 8370 8371 /* print some info if requested */ 8372 if (pcbddc->dbg_flag) { 8373 if (!multilevel_allowed) { 8374 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8375 if (multilevel_requested) { 8376 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio)); 8377 } else if (pcbddc->max_levels) { 8378 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%" PetscInt_FMT ")\n",pcbddc->max_levels)); 8379 } 8380 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8381 } 8382 } 8383 8384 /* communicate coarse discrete gradient */ 8385 coarseG = NULL; 8386 if (pcbddc->nedcG && multilevel_allowed) { 8387 MPI_Comm ccomm; 8388 if (coarse_mat) { 8389 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8390 } else { 8391 ccomm = MPI_COMM_NULL; 8392 } 8393 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG)); 8394 } 8395 8396 /* create the coarse KSP object only once with defaults */ 8397 if (coarse_mat) { 8398 PetscBool isredundant,isbddc,force,valid; 8399 PetscViewer dbg_viewer = NULL; 8400 8401 if (pcbddc->dbg_flag) { 8402 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8403 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level)); 8404 } 8405 if (!pcbddc->coarse_ksp) { 8406 char prefix[256],str_level[16]; 8407 size_t len; 8408 8409 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp)); 8410 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure)); 8411 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1)); 8412 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1)); 8413 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8414 PetscCall(KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type)); 8415 PetscCall(KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE)); 8416 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8417 /* TODO is this logic correct? should check for coarse_mat type */ 8418 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8419 /* prefix */ 8420 PetscCall(PetscStrcpy(prefix,"")); 8421 PetscCall(PetscStrcpy(str_level,"")); 8422 if (!pcbddc->current_level) { 8423 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix))); 8424 PetscCall(PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix))); 8425 } else { 8426 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 8427 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8428 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8429 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8430 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1)); 8431 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 8432 PetscCall(PetscStrlcat(prefix,str_level,sizeof(prefix))); 8433 } 8434 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix)); 8435 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8436 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8437 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8438 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8439 /* allow user customization */ 8440 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8441 /* get some info after set from options */ 8442 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8443 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8444 force = PETSC_FALSE; 8445 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8446 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8447 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8448 if (multilevel_allowed && !force && !valid) { 8449 isbddc = PETSC_TRUE; 8450 PetscCall(PCSetType(pc_temp,PCBDDC)); 8451 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8452 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8453 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8454 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8455 PetscObjectOptionsBegin((PetscObject)pc_temp); 8456 PetscCall((*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp)); 8457 PetscCall(PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp)); 8458 PetscOptionsEnd(); 8459 pc_temp->setfromoptionscalled++; 8460 } 8461 } 8462 } 8463 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8464 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8465 if (nisdofs) { 8466 PetscCall(PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray)); 8467 for (i=0;i<nisdofs;i++) { 8468 PetscCall(ISDestroy(&isarray[i])); 8469 } 8470 } 8471 if (nisneu) { 8472 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs])); 8473 PetscCall(ISDestroy(&isarray[nisdofs])); 8474 } 8475 if (nisvert) { 8476 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1])); 8477 PetscCall(ISDestroy(&isarray[nis-1])); 8478 } 8479 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE)); 8480 8481 /* get some info after set from options */ 8482 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8483 8484 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8485 if (isbddc && !multilevel_allowed) { 8486 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8487 } 8488 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8489 force = PETSC_FALSE; 8490 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8491 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8492 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8493 PetscCall(PCSetType(pc_temp,PCBDDC)); 8494 } 8495 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant)); 8496 if (isredundant) { 8497 KSP inner_ksp; 8498 PC inner_pc; 8499 8500 PetscCall(PCRedundantGetKSP(pc_temp,&inner_ksp)); 8501 PetscCall(KSPGetPC(inner_ksp,&inner_pc)); 8502 } 8503 8504 /* parameters which miss an API */ 8505 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8506 if (isbddc) { 8507 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8508 8509 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8510 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8511 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8512 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8513 if (pcbddc_coarse->benign_saddle_point) { 8514 Mat coarsedivudotp_is; 8515 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8516 IS row,col; 8517 const PetscInt *gidxs; 8518 PetscInt n,st,M,N; 8519 8520 PetscCall(MatGetSize(coarsedivudotp,&n,NULL)); 8521 PetscCallMPI(MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat))); 8522 st = st-n; 8523 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row)); 8524 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL)); 8525 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap,&n)); 8526 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 8527 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col)); 8528 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 8529 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 8530 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 8531 PetscCall(ISGetSize(row,&M)); 8532 PetscCall(MatGetSize(coarse_mat,&N,NULL)); 8533 PetscCall(ISDestroy(&row)); 8534 PetscCall(ISDestroy(&col)); 8535 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is)); 8536 PetscCall(MatSetType(coarsedivudotp_is,MATIS)); 8537 PetscCall(MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N)); 8538 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g)); 8539 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8540 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8541 PetscCall(MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp)); 8542 PetscCall(MatDestroy(&coarsedivudotp)); 8543 PetscCall(PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL)); 8544 PetscCall(MatDestroy(&coarsedivudotp_is)); 8545 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8546 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8547 } 8548 } 8549 8550 /* propagate symmetry info of coarse matrix */ 8551 PetscCall(MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 8552 if (pc->pmat->symmetric_set) PetscCall(MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric)); 8553 if (pc->pmat->hermitian_set) PetscCall(MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian)); 8554 if (pc->pmat->spd_set) PetscCall(MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd)); 8555 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8556 PetscCall(MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE)); 8557 } 8558 /* set operators */ 8559 PetscCall(MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view")); 8560 PetscCall(MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix)); 8561 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8562 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level)); 8563 } 8564 PetscCall(MatDestroy(&coarseG)); 8565 PetscCall(PetscFree(isarray)); 8566 #if 0 8567 { 8568 PetscViewer viewer; 8569 char filename[256]; 8570 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8571 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8572 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8573 PetscCall(MatView(coarse_mat,viewer)); 8574 PetscCall(PetscViewerPopFormat(viewer)); 8575 PetscCall(PetscViewerDestroy(&viewer)); 8576 } 8577 #endif 8578 8579 if (corners) { 8580 Vec gv; 8581 IS is; 8582 const PetscInt *idxs; 8583 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8584 PetscScalar *coords; 8585 8586 PetscCheck(pcbddc->mat_graph->cloc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8587 PetscCall(VecGetSize(pcbddc->coarse_vec,&N)); 8588 PetscCall(VecGetLocalSize(pcbddc->coarse_vec,&n)); 8589 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv)); 8590 PetscCall(VecSetBlockSize(gv,cdim)); 8591 PetscCall(VecSetSizes(gv,n*cdim,N*cdim)); 8592 PetscCall(VecSetType(gv,VECSTANDARD)); 8593 PetscCall(VecSetFromOptions(gv)); 8594 PetscCall(VecSet(gv,PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8595 8596 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8597 PetscCall(ISGetLocalSize(is,&n)); 8598 PetscCall(ISGetIndices(is,&idxs)); 8599 PetscCall(PetscMalloc1(n*cdim,&coords)); 8600 for (i=0;i<n;i++) { 8601 for (d=0;d<cdim;d++) { 8602 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8603 } 8604 } 8605 PetscCall(ISRestoreIndices(is,&idxs)); 8606 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8607 8608 PetscCall(ISGetLocalSize(corners,&n)); 8609 PetscCall(ISGetIndices(corners,&idxs)); 8610 PetscCall(VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES)); 8611 PetscCall(ISRestoreIndices(corners,&idxs)); 8612 PetscCall(PetscFree(coords)); 8613 PetscCall(VecAssemblyBegin(gv)); 8614 PetscCall(VecAssemblyEnd(gv)); 8615 PetscCall(VecGetArray(gv,&coords)); 8616 if (pcbddc->coarse_ksp) { 8617 PC coarse_pc; 8618 PetscBool isbddc; 8619 8620 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 8621 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 8622 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8623 PetscReal *realcoords; 8624 8625 PetscCall(VecGetLocalSize(gv,&n)); 8626 #if defined(PETSC_USE_COMPLEX) 8627 PetscCall(PetscMalloc1(n,&realcoords)); 8628 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8629 #else 8630 realcoords = coords; 8631 #endif 8632 PetscCall(PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords)); 8633 #if defined(PETSC_USE_COMPLEX) 8634 PetscCall(PetscFree(realcoords)); 8635 #endif 8636 } 8637 } 8638 PetscCall(VecRestoreArray(gv,&coords)); 8639 PetscCall(VecDestroy(&gv)); 8640 } 8641 PetscCall(ISDestroy(&corners)); 8642 8643 if (pcbddc->coarse_ksp) { 8644 Vec crhs,csol; 8645 8646 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&csol)); 8647 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&crhs)); 8648 if (!csol) { 8649 PetscCall(MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL)); 8650 } 8651 if (!crhs) { 8652 PetscCall(MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs))); 8653 } 8654 } 8655 PetscCall(MatDestroy(&coarsedivudotp)); 8656 8657 /* compute null space for coarse solver if the benign trick has been requested */ 8658 if (pcbddc->benign_null) { 8659 8660 PetscCall(VecSet(pcbddc->vec1_P,0.)); 8661 for (i=0;i<pcbddc->benign_n;i++) { 8662 PetscCall(VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES)); 8663 } 8664 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8665 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8666 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8667 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8668 if (coarse_mat) { 8669 Vec nullv; 8670 PetscScalar *array,*array2; 8671 PetscInt nl; 8672 8673 PetscCall(MatCreateVecs(coarse_mat,&nullv,NULL)); 8674 PetscCall(VecGetLocalSize(nullv,&nl)); 8675 PetscCall(VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8676 PetscCall(VecGetArray(nullv,&array2)); 8677 PetscCall(PetscArraycpy(array2,array,nl)); 8678 PetscCall(VecRestoreArray(nullv,&array2)); 8679 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8680 PetscCall(VecNormalize(nullv,NULL)); 8681 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace)); 8682 PetscCall(VecDestroy(&nullv)); 8683 } 8684 } 8685 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8686 8687 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8688 if (pcbddc->coarse_ksp) { 8689 PetscBool ispreonly; 8690 8691 if (CoarseNullSpace) { 8692 PetscBool isnull; 8693 8694 PetscCall(MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull)); 8695 if (isnull) PetscCall(MatSetNullSpace(coarse_mat,CoarseNullSpace)); 8696 /* TODO: add local nullspaces (if any) */ 8697 } 8698 /* setup coarse ksp */ 8699 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8700 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8701 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly)); 8702 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8703 KSP check_ksp; 8704 KSPType check_ksp_type; 8705 PC check_pc; 8706 Vec check_vec,coarse_vec; 8707 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8708 PetscInt its; 8709 PetscBool compute_eigs; 8710 PetscReal *eigs_r,*eigs_c; 8711 PetscInt neigs; 8712 const char *prefix; 8713 8714 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8715 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp)); 8716 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0)); 8717 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE)); 8718 PetscCall(KSPSetOperators(check_ksp,coarse_mat,coarse_mat)); 8719 PetscCall(KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size)); 8720 /* prevent from setup unneeded object */ 8721 PetscCall(KSPGetPC(check_ksp,&check_pc)); 8722 PetscCall(PCSetType(check_pc,PCNONE)); 8723 if (ispreonly) { 8724 check_ksp_type = KSPPREONLY; 8725 compute_eigs = PETSC_FALSE; 8726 } else { 8727 check_ksp_type = KSPGMRES; 8728 compute_eigs = PETSC_TRUE; 8729 } 8730 PetscCall(KSPSetType(check_ksp,check_ksp_type)); 8731 PetscCall(KSPSetComputeSingularValues(check_ksp,compute_eigs)); 8732 PetscCall(KSPSetComputeEigenvalues(check_ksp,compute_eigs)); 8733 PetscCall(KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1)); 8734 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix)); 8735 PetscCall(KSPSetOptionsPrefix(check_ksp,prefix)); 8736 PetscCall(KSPAppendOptionsPrefix(check_ksp,"check_")); 8737 PetscCall(KSPSetFromOptions(check_ksp)); 8738 PetscCall(KSPSetUp(check_ksp)); 8739 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&check_pc)); 8740 PetscCall(KSPSetPC(check_ksp,check_pc)); 8741 /* create random vec */ 8742 PetscCall(MatCreateVecs(coarse_mat,&coarse_vec,&check_vec)); 8743 PetscCall(VecSetRandom(check_vec,NULL)); 8744 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8745 /* solve coarse problem */ 8746 PetscCall(KSPSolve(check_ksp,coarse_vec,coarse_vec)); 8747 PetscCall(KSPCheckSolve(check_ksp,pc,coarse_vec)); 8748 /* set eigenvalue estimation if preonly has not been requested */ 8749 if (compute_eigs) { 8750 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_r)); 8751 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_c)); 8752 PetscCall(KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs)); 8753 if (neigs) { 8754 lambda_max = eigs_r[neigs-1]; 8755 lambda_min = eigs_r[0]; 8756 if (pcbddc->use_coarse_estimates) { 8757 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8758 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min)); 8759 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min))); 8760 } 8761 } 8762 } 8763 } 8764 8765 /* check coarse problem residual error */ 8766 if (pcbddc->dbg_flag) { 8767 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8768 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1))); 8769 PetscCall(VecAXPY(check_vec,-1.0,coarse_vec)); 8770 PetscCall(VecNorm(check_vec,NORM_INFINITY,&infty_error)); 8771 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8772 PetscCall(VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error)); 8773 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates)); 8774 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer)); 8775 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer)); 8776 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",(double)infty_error)); 8777 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",(double)abs_infty_error)); 8778 if (CoarseNullSpace) { 8779 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n")); 8780 } 8781 if (compute_eigs) { 8782 PetscReal lambda_max_s,lambda_min_s; 8783 KSPConvergedReason reason; 8784 PetscCall(KSPGetType(check_ksp,&check_ksp_type)); 8785 PetscCall(KSPGetIterationNumber(check_ksp,&its)); 8786 PetscCall(KSPGetConvergedReason(check_ksp,&reason)); 8787 PetscCall(KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s)); 8788 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,(double)lambda_min,(double)lambda_max,(double)lambda_min_s,(double)lambda_max_s)); 8789 for (i=0;i<neigs;i++) { 8790 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",(double)eigs_r[i],(double)eigs_c[i])); 8791 } 8792 } 8793 PetscCall(PetscViewerFlush(dbg_viewer)); 8794 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1))); 8795 } 8796 PetscCall(VecDestroy(&check_vec)); 8797 PetscCall(VecDestroy(&coarse_vec)); 8798 PetscCall(KSPDestroy(&check_ksp)); 8799 if (compute_eigs) { 8800 PetscCall(PetscFree(eigs_r)); 8801 PetscCall(PetscFree(eigs_c)); 8802 } 8803 } 8804 } 8805 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8806 /* print additional info */ 8807 if (pcbddc->dbg_flag) { 8808 /* waits until all processes reaches this point */ 8809 PetscCall(PetscBarrier((PetscObject)pc)); 8810 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %" PetscInt_FMT "\n",pcbddc->current_level)); 8811 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8812 } 8813 8814 /* free memory */ 8815 PetscCall(MatDestroy(&coarse_mat)); 8816 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8817 PetscFunctionReturn(0); 8818 } 8819 8820 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8821 { 8822 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8823 PC_IS* pcis = (PC_IS*)pc->data; 8824 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8825 IS subset,subset_mult,subset_n; 8826 PetscInt local_size,coarse_size=0; 8827 PetscInt *local_primal_indices=NULL; 8828 const PetscInt *t_local_primal_indices; 8829 8830 PetscFunctionBegin; 8831 /* Compute global number of coarse dofs */ 8832 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8833 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n)); 8834 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset)); 8835 PetscCall(ISDestroy(&subset_n)); 8836 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult)); 8837 PetscCall(ISRenumber(subset,subset_mult,&coarse_size,&subset_n)); 8838 PetscCall(ISDestroy(&subset)); 8839 PetscCall(ISDestroy(&subset_mult)); 8840 PetscCall(ISGetLocalSize(subset_n,&local_size)); 8841 PetscCheck(local_size == pcbddc->local_primal_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT,local_size,pcbddc->local_primal_size); 8842 PetscCall(PetscMalloc1(local_size,&local_primal_indices)); 8843 PetscCall(ISGetIndices(subset_n,&t_local_primal_indices)); 8844 PetscCall(PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size)); 8845 PetscCall(ISRestoreIndices(subset_n,&t_local_primal_indices)); 8846 PetscCall(ISDestroy(&subset_n)); 8847 8848 /* check numbering */ 8849 if (pcbddc->dbg_flag) { 8850 PetscScalar coarsesum,*array,*array2; 8851 PetscInt i; 8852 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8853 8854 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8855 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8856 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n")); 8857 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8858 /* counter */ 8859 PetscCall(VecSet(pcis->vec1_global,0.0)); 8860 PetscCall(VecSet(pcis->vec1_N,1.0)); 8861 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8862 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8863 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8864 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8865 PetscCall(VecSet(pcis->vec1_N,0.0)); 8866 for (i=0;i<pcbddc->local_primal_size;i++) { 8867 PetscCall(VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES)); 8868 } 8869 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8870 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8871 PetscCall(VecSet(pcis->vec1_global,0.0)); 8872 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8873 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8874 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8875 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8876 PetscCall(VecGetArray(pcis->vec1_N,&array)); 8877 PetscCall(VecGetArray(pcis->vec2_N,&array2)); 8878 for (i=0;i<pcis->n;i++) { 8879 if (array[i] != 0.0 && array[i] != array2[i]) { 8880 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8881 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8882 set_error = PETSC_TRUE; 8883 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi)); 8884 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n",PetscGlobalRank,i,gi,owned,neigh)); 8885 } 8886 } 8887 PetscCall(VecRestoreArray(pcis->vec2_N,&array2)); 8888 PetscCall(MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8889 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8890 for (i=0;i<pcis->n;i++) { 8891 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8892 } 8893 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 8894 PetscCall(VecSet(pcis->vec1_global,0.0)); 8895 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8896 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8897 PetscCall(VecSum(pcis->vec1_global,&coarsesum)); 8898 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %" PetscInt_FMT " (%lf)\n",coarse_size,(double)PetscRealPart(coarsesum))); 8899 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8900 PetscInt *gidxs; 8901 8902 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&gidxs)); 8903 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs)); 8904 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n")); 8905 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8906 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank)); 8907 for (i=0;i<pcbddc->local_primal_size;i++) { 8908 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i])); 8909 } 8910 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8911 PetscCall(PetscFree(gidxs)); 8912 } 8913 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8914 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8915 PetscCheck(!set_error_reduced,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8916 } 8917 8918 /* get back data */ 8919 *coarse_size_n = coarse_size; 8920 *local_primal_indices_n = local_primal_indices; 8921 PetscFunctionReturn(0); 8922 } 8923 8924 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8925 { 8926 IS localis_t; 8927 PetscInt i,lsize,*idxs,n; 8928 PetscScalar *vals; 8929 8930 PetscFunctionBegin; 8931 /* get indices in local ordering exploiting local to global map */ 8932 PetscCall(ISGetLocalSize(globalis,&lsize)); 8933 PetscCall(PetscMalloc1(lsize,&vals)); 8934 for (i=0;i<lsize;i++) vals[i] = 1.0; 8935 PetscCall(ISGetIndices(globalis,(const PetscInt**)&idxs)); 8936 PetscCall(VecSet(gwork,0.0)); 8937 PetscCall(VecSet(lwork,0.0)); 8938 if (idxs) { /* multilevel guard */ 8939 PetscCall(VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE)); 8940 PetscCall(VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES)); 8941 } 8942 PetscCall(VecAssemblyBegin(gwork)); 8943 PetscCall(ISRestoreIndices(globalis,(const PetscInt**)&idxs)); 8944 PetscCall(PetscFree(vals)); 8945 PetscCall(VecAssemblyEnd(gwork)); 8946 /* now compute set in local ordering */ 8947 PetscCall(VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 8948 PetscCall(VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 8949 PetscCall(VecGetArrayRead(lwork,(const PetscScalar**)&vals)); 8950 PetscCall(VecGetSize(lwork,&n)); 8951 for (i=0,lsize=0;i<n;i++) { 8952 if (PetscRealPart(vals[i]) > 0.5) { 8953 lsize++; 8954 } 8955 } 8956 PetscCall(PetscMalloc1(lsize,&idxs)); 8957 for (i=0,lsize=0;i<n;i++) { 8958 if (PetscRealPart(vals[i]) > 0.5) { 8959 idxs[lsize++] = i; 8960 } 8961 } 8962 PetscCall(VecRestoreArrayRead(lwork,(const PetscScalar**)&vals)); 8963 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t)); 8964 *localis = localis_t; 8965 PetscFunctionReturn(0); 8966 } 8967 8968 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 8969 { 8970 PC_IS *pcis = (PC_IS*)pc->data; 8971 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8972 PC_IS *pcisf; 8973 PC_BDDC *pcbddcf; 8974 PC pcf; 8975 8976 PetscFunctionBegin; 8977 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc),&pcf)); 8978 PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)pcf)); 8979 PetscCall(PCSetOperators(pcf,pc->mat,pc->pmat)); 8980 PetscCall(PCSetType(pcf,PCBDDC)); 8981 8982 pcisf = (PC_IS*)pcf->data; 8983 pcbddcf = (PC_BDDC*)pcf->data; 8984 8985 pcisf->is_B_local = pcis->is_B_local; 8986 pcisf->vec1_N = pcis->vec1_N; 8987 pcisf->BtoNmap = pcis->BtoNmap; 8988 pcisf->n = pcis->n; 8989 pcisf->n_B = pcis->n_B; 8990 8991 PetscCall(PetscFree(pcbddcf->mat_graph)); 8992 PetscCall(PetscFree(pcbddcf->sub_schurs)); 8993 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 8994 pcbddcf->sub_schurs = schurs; 8995 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 8996 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 8997 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 8998 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 8999 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9000 pcbddcf->use_faces = PETSC_TRUE; 9001 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9002 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9003 pcbddcf->use_qr_single = (PetscBool)!constraints; 9004 pcbddcf->fake_change = PETSC_TRUE; 9005 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9006 9007 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9008 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9009 9010 *change = pcbddcf->ConstraintMatrix; 9011 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat),pcbddcf->local_primal_size_cc,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,change_primal)); 9012 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat),pcbddcf->local_primal_size_cc,pcbddcf->local_primal_ref_mult,PETSC_COPY_VALUES,change_primal_mult)); 9013 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9014 9015 if (schurs) pcbddcf->sub_schurs = NULL; 9016 pcbddcf->ConstraintMatrix = NULL; 9017 pcbddcf->mat_graph = NULL; 9018 pcisf->is_B_local = NULL; 9019 pcisf->vec1_N = NULL; 9020 pcisf->BtoNmap = NULL; 9021 PetscCall(PCDestroy(&pcf)); 9022 PetscFunctionReturn(0); 9023 } 9024 9025 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9026 { 9027 PC_IS *pcis=(PC_IS*)pc->data; 9028 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9029 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9030 Mat S_j; 9031 PetscInt *used_xadj,*used_adjncy; 9032 PetscBool free_used_adj; 9033 9034 PetscFunctionBegin; 9035 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9036 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9037 free_used_adj = PETSC_FALSE; 9038 if (pcbddc->sub_schurs_layers == -1) { 9039 used_xadj = NULL; 9040 used_adjncy = NULL; 9041 } else { 9042 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9043 used_xadj = pcbddc->mat_graph->xadj; 9044 used_adjncy = pcbddc->mat_graph->adjncy; 9045 } else if (pcbddc->computed_rowadj) { 9046 used_xadj = pcbddc->mat_graph->xadj; 9047 used_adjncy = pcbddc->mat_graph->adjncy; 9048 } else { 9049 PetscBool flg_row=PETSC_FALSE; 9050 const PetscInt *xadj,*adjncy; 9051 PetscInt nvtxs; 9052 9053 PetscCall(MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9054 if (flg_row) { 9055 PetscCall(PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy)); 9056 PetscCall(PetscArraycpy(used_xadj,xadj,nvtxs+1)); 9057 PetscCall(PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs])); 9058 free_used_adj = PETSC_TRUE; 9059 } else { 9060 pcbddc->sub_schurs_layers = -1; 9061 used_xadj = NULL; 9062 used_adjncy = NULL; 9063 } 9064 PetscCall(MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9065 } 9066 } 9067 9068 /* setup sub_schurs data */ 9069 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9070 if (!sub_schurs->schur_explicit) { 9071 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9072 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9073 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL)); 9074 } else { 9075 Mat change = NULL; 9076 Vec scaling = NULL; 9077 IS change_primal = NULL, iP; 9078 PetscInt benign_n; 9079 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9080 PetscBool need_change = PETSC_FALSE; 9081 PetscBool discrete_harmonic = PETSC_FALSE; 9082 9083 if (!pcbddc->use_vertices && reuse_solvers) { 9084 PetscInt n_vertices; 9085 9086 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 9087 reuse_solvers = (PetscBool)!n_vertices; 9088 } 9089 if (!pcbddc->benign_change_explicit) { 9090 benign_n = pcbddc->benign_n; 9091 } else { 9092 benign_n = 0; 9093 } 9094 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9095 We need a global reduction to avoid possible deadlocks. 9096 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9097 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9098 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9099 PetscCall(MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 9100 need_change = (PetscBool)(!need_change); 9101 } 9102 /* If the user defines additional constraints, we import them here */ 9103 if (need_change) { 9104 PetscCheck(!pcbddc->sub_schurs_rebuild,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9105 PetscCall(PCBDDCComputeFakeChange(pc,PETSC_FALSE,NULL,NULL,&change,&change_primal,NULL,&sub_schurs->change_with_qr)); 9106 9107 } 9108 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9109 9110 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP)); 9111 if (iP) { 9112 PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC"); 9113 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL)); 9114 PetscOptionsEnd(); 9115 } 9116 if (discrete_harmonic) { 9117 Mat A; 9118 PetscCall(MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A)); 9119 PetscCall(MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL)); 9120 PetscCall(PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP)); 9121 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9122 PetscCall(MatDestroy(&A)); 9123 } else { 9124 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9125 } 9126 PetscCall(MatDestroy(&change)); 9127 PetscCall(ISDestroy(&change_primal)); 9128 } 9129 PetscCall(MatDestroy(&S_j)); 9130 9131 /* free adjacency */ 9132 if (free_used_adj) PetscCall(PetscFree2(used_xadj,used_adjncy)); 9133 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9134 PetscFunctionReturn(0); 9135 } 9136 9137 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9138 { 9139 PC_IS *pcis=(PC_IS*)pc->data; 9140 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9141 PCBDDCGraph graph; 9142 9143 PetscFunctionBegin; 9144 /* attach interface graph for determining subsets */ 9145 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9146 IS verticesIS,verticescomm; 9147 PetscInt vsize,*idxs; 9148 9149 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9150 PetscCall(ISGetSize(verticesIS,&vsize)); 9151 PetscCall(ISGetIndices(verticesIS,(const PetscInt**)&idxs)); 9152 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm)); 9153 PetscCall(ISRestoreIndices(verticesIS,(const PetscInt**)&idxs)); 9154 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9155 PetscCall(PCBDDCGraphCreate(&graph)); 9156 PetscCall(PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount)); 9157 PetscCall(PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm)); 9158 PetscCall(ISDestroy(&verticescomm)); 9159 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9160 } else { 9161 graph = pcbddc->mat_graph; 9162 } 9163 /* print some info */ 9164 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9165 IS vertices; 9166 PetscInt nv,nedges,nfaces; 9167 PetscCall(PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 9168 PetscCall(PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9169 PetscCall(ISGetSize(vertices,&nv)); 9170 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9171 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 9172 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices)); 9173 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges)); 9174 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces)); 9175 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9176 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9177 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9178 } 9179 9180 /* sub_schurs init */ 9181 if (!pcbddc->sub_schurs) { 9182 PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9183 } 9184 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild,PETSC_FALSE)); 9185 9186 /* free graph struct */ 9187 if (pcbddc->sub_schurs_rebuild) { 9188 PetscCall(PCBDDCGraphDestroy(&graph)); 9189 } 9190 PetscFunctionReturn(0); 9191 } 9192 9193 PetscErrorCode PCBDDCCheckOperator(PC pc) 9194 { 9195 PC_IS *pcis=(PC_IS*)pc->data; 9196 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9197 9198 PetscFunctionBegin; 9199 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9200 IS zerodiag = NULL; 9201 Mat S_j,B0_B=NULL; 9202 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9203 PetscScalar *p0_check,*array,*array2; 9204 PetscReal norm; 9205 PetscInt i; 9206 9207 /* B0 and B0_B */ 9208 if (zerodiag) { 9209 IS dummy; 9210 9211 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy)); 9212 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 9213 PetscCall(MatCreateVecs(B0_B,NULL,&dummy_vec)); 9214 PetscCall(ISDestroy(&dummy)); 9215 } 9216 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9217 PetscCall(VecDuplicate(pcbddc->vec1_P,&vec_scale_P)); 9218 PetscCall(VecSet(pcbddc->vec1_P,1.0)); 9219 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9220 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9221 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9222 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9223 PetscCall(VecReciprocal(vec_scale_P)); 9224 /* S_j */ 9225 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9226 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9227 9228 /* mimic vector in \widetilde{W}_\Gamma */ 9229 PetscCall(VecSetRandom(pcis->vec1_N,NULL)); 9230 /* continuous in primal space */ 9231 PetscCall(VecSetRandom(pcbddc->coarse_vec,NULL)); 9232 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9233 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9234 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9235 PetscCall(PetscCalloc1(pcbddc->benign_n,&p0_check)); 9236 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9237 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9238 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9239 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9240 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9241 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9242 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9243 PetscCall(VecDuplicate(pcis->vec2_B,&vec_check_B)); 9244 PetscCall(VecCopy(pcis->vec2_B,vec_check_B)); 9245 9246 /* assemble rhs for coarse problem */ 9247 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9248 /* local with Schur */ 9249 PetscCall(MatMult(S_j,pcis->vec2_B,pcis->vec1_B)); 9250 if (zerodiag) { 9251 PetscCall(VecGetArray(dummy_vec,&array)); 9252 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9253 PetscCall(VecRestoreArray(dummy_vec,&array)); 9254 PetscCall(MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B)); 9255 } 9256 /* sum on primal nodes the local contributions */ 9257 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9258 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9259 PetscCall(VecGetArray(pcis->vec1_N,&array)); 9260 PetscCall(VecGetArray(pcbddc->vec1_P,&array2)); 9261 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9262 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array2)); 9263 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 9264 PetscCall(VecSet(pcbddc->coarse_vec,0.)); 9265 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9266 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9267 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9268 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9269 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9270 /* scale primal nodes (BDDC sums contibutions) */ 9271 PetscCall(VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P)); 9272 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9273 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9274 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9275 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9276 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9277 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9278 /* global: \widetilde{B0}_B w_\Gamma */ 9279 if (zerodiag) { 9280 PetscCall(MatMult(B0_B,pcis->vec2_B,dummy_vec)); 9281 PetscCall(VecGetArray(dummy_vec,&array)); 9282 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9283 PetscCall(VecRestoreArray(dummy_vec,&array)); 9284 } 9285 /* BDDC */ 9286 PetscCall(VecSet(pcis->vec1_D,0.)); 9287 PetscCall(PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE)); 9288 9289 PetscCall(VecCopy(pcis->vec1_B,pcis->vec2_B)); 9290 PetscCall(VecAXPY(pcis->vec1_B,-1.0,vec_check_B)); 9291 PetscCall(VecNorm(pcis->vec1_B,NORM_INFINITY,&norm)); 9292 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,(double)norm)); 9293 for (i=0;i<pcbddc->benign_n;i++) { 9294 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%" PetscInt_FMT "] error is %1.4e\n",PetscGlobalRank,i,(double)PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]))); 9295 } 9296 PetscCall(PetscFree(p0_check)); 9297 PetscCall(VecDestroy(&vec_scale_P)); 9298 PetscCall(VecDestroy(&vec_check_B)); 9299 PetscCall(VecDestroy(&dummy_vec)); 9300 PetscCall(MatDestroy(&S_j)); 9301 PetscCall(MatDestroy(&B0_B)); 9302 } 9303 PetscFunctionReturn(0); 9304 } 9305 9306 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9307 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9308 { 9309 Mat At; 9310 IS rows; 9311 PetscInt rst,ren; 9312 PetscLayout rmap; 9313 9314 PetscFunctionBegin; 9315 rst = ren = 0; 9316 if (ccomm != MPI_COMM_NULL) { 9317 PetscCall(PetscLayoutCreate(ccomm,&rmap)); 9318 PetscCall(PetscLayoutSetSize(rmap,A->rmap->N)); 9319 PetscCall(PetscLayoutSetBlockSize(rmap,1)); 9320 PetscCall(PetscLayoutSetUp(rmap)); 9321 PetscCall(PetscLayoutGetRange(rmap,&rst,&ren)); 9322 } 9323 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows)); 9324 PetscCall(MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At)); 9325 PetscCall(ISDestroy(&rows)); 9326 9327 if (ccomm != MPI_COMM_NULL) { 9328 Mat_MPIAIJ *a,*b; 9329 IS from,to; 9330 Vec gvec; 9331 PetscInt lsize; 9332 9333 PetscCall(MatCreate(ccomm,B)); 9334 PetscCall(MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N)); 9335 PetscCall(MatSetType(*B,MATAIJ)); 9336 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 9337 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9338 a = (Mat_MPIAIJ*)At->data; 9339 b = (Mat_MPIAIJ*)(*B)->data; 9340 PetscCallMPI(MPI_Comm_size(ccomm,&b->size)); 9341 PetscCallMPI(MPI_Comm_rank(ccomm,&b->rank)); 9342 PetscCall(PetscObjectReference((PetscObject)a->A)); 9343 PetscCall(PetscObjectReference((PetscObject)a->B)); 9344 b->A = a->A; 9345 b->B = a->B; 9346 9347 b->donotstash = a->donotstash; 9348 b->roworiented = a->roworiented; 9349 b->rowindices = NULL; 9350 b->rowvalues = NULL; 9351 b->getrowactive = PETSC_FALSE; 9352 9353 (*B)->rmap = rmap; 9354 (*B)->factortype = A->factortype; 9355 (*B)->assembled = PETSC_TRUE; 9356 (*B)->insertmode = NOT_SET_VALUES; 9357 (*B)->preallocated = PETSC_TRUE; 9358 9359 if (a->colmap) { 9360 #if defined(PETSC_USE_CTABLE) 9361 PetscCall(PetscTableCreateCopy(a->colmap,&b->colmap)); 9362 #else 9363 PetscCall(PetscMalloc1(At->cmap->N,&b->colmap)); 9364 PetscCall(PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt))); 9365 PetscCall(PetscArraycpy(b->colmap,a->colmap,At->cmap->N)); 9366 #endif 9367 } else b->colmap = NULL; 9368 if (a->garray) { 9369 PetscInt len; 9370 len = a->B->cmap->n; 9371 PetscCall(PetscMalloc1(len+1,&b->garray)); 9372 PetscCall(PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt))); 9373 if (len) PetscCall(PetscArraycpy(b->garray,a->garray,len)); 9374 } else b->garray = NULL; 9375 9376 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9377 b->lvec = a->lvec; 9378 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec)); 9379 9380 /* cannot use VecScatterCopy */ 9381 PetscCall(VecGetLocalSize(b->lvec,&lsize)); 9382 PetscCall(ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from)); 9383 PetscCall(ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to)); 9384 PetscCall(MatCreateVecs(*B,&gvec,NULL)); 9385 PetscCall(VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx)); 9386 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx)); 9387 PetscCall(ISDestroy(&from)); 9388 PetscCall(ISDestroy(&to)); 9389 PetscCall(VecDestroy(&gvec)); 9390 } 9391 PetscCall(MatDestroy(&At)); 9392 PetscFunctionReturn(0); 9393 } 9394