1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 PetscCall(MatGetSize(A,&nr,&nc)); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 PetscCall(PetscMalloc1(ulw,&uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 PetscCall(PetscMalloc1(n,&sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 PetscCall(PetscMalloc1(nr*nr,&U)); 46 PetscCall(PetscBLASIntCast(nr,&bM)); 47 PetscCall(PetscBLASIntCast(nc,&bN)); 48 PetscCall(PetscBLASIntCast(ulw,&lwork)); 49 PetscCall(MatDenseGetArray(A,&data)); 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 53 #else 54 PetscCall(PetscMalloc1(5*n,&rwork2)); 55 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 56 PetscCall(PetscFree(rwork2)); 57 #endif 58 PetscCall(PetscFPTrapPop()); 59 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 60 PetscCall(MatDenseRestoreArray(A,&data)); 61 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 62 if (!rwork) { 63 PetscCall(PetscFree(sing)); 64 } 65 if (!work) { 66 PetscCall(PetscFree(uwork)); 67 } 68 /* create B */ 69 if (!range) { 70 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B)); 71 PetscCall(MatDenseGetArray(*B,&data)); 72 PetscCall(PetscArraycpy(data,U+nr*i,(nr-i)*nr)); 73 } else { 74 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B)); 75 PetscCall(MatDenseGetArray(*B,&data)); 76 PetscCall(PetscArraycpy(data,U,i*nr)); 77 } 78 PetscCall(MatDenseRestoreArray(*B,&data)); 79 PetscCall(PetscFree(U)); 80 PetscFunctionReturn(0); 81 } 82 83 /* TODO REMOVE */ 84 #if defined(PRINT_GDET) 85 static int inc = 0; 86 static int lev = 0; 87 #endif 88 89 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 90 { 91 Mat GE,GEd; 92 PetscInt rsize,csize,esize; 93 PetscScalar *ptr; 94 95 PetscFunctionBegin; 96 PetscCall(ISGetSize(edge,&esize)); 97 if (!esize) PetscFunctionReturn(0); 98 PetscCall(ISGetSize(extrow,&rsize)); 99 PetscCall(ISGetSize(extcol,&csize)); 100 101 /* gradients */ 102 ptr = work + 5*esize; 103 PetscCall(MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE)); 104 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins)); 105 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins)); 106 PetscCall(MatDestroy(&GE)); 107 108 /* constants */ 109 ptr += rsize*csize; 110 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd)); 111 PetscCall(MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE)); 112 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd)); 113 PetscCall(MatDestroy(&GE)); 114 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins)); 115 PetscCall(MatDestroy(&GEd)); 116 117 if (corners) { 118 Mat GEc; 119 const PetscScalar *vals; 120 PetscScalar v; 121 122 PetscCall(MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc)); 123 PetscCall(MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd)); 124 PetscCall(MatDenseGetArrayRead(GEd,&vals)); 125 /* v = PetscAbsScalar(vals[0]) */; 126 v = 1.; 127 cvals[0] = vals[0]/v; 128 cvals[1] = vals[1]/v; 129 PetscCall(MatDenseRestoreArrayRead(GEd,&vals)); 130 PetscCall(MatScale(*GKins,1./v)); 131 #if defined(PRINT_GDET) 132 { 133 PetscViewer viewer; 134 char filename[256]; 135 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 136 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 137 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 138 PetscCall(PetscObjectSetName((PetscObject)GEc,"GEc")); 139 PetscCall(MatView(GEc,viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)(*GKins),"GK")); 141 PetscCall(MatView(*GKins,viewer)); 142 PetscCall(PetscObjectSetName((PetscObject)GEd,"Gproj")); 143 PetscCall(MatView(GEd,viewer)); 144 PetscCall(PetscViewerDestroy(&viewer)); 145 } 146 #endif 147 PetscCall(MatDestroy(&GEd)); 148 PetscCall(MatDestroy(&GEc)); 149 } 150 151 PetscFunctionReturn(0); 152 } 153 154 PetscErrorCode PCBDDCNedelecSupport(PC pc) 155 { 156 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 157 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 158 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 159 Vec tvec; 160 PetscSF sfv; 161 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 162 MPI_Comm comm; 163 IS lned,primals,allprimals,nedfieldlocal; 164 IS *eedges,*extrows,*extcols,*alleedges; 165 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 166 PetscScalar *vals,*work; 167 PetscReal *rwork; 168 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 169 PetscInt ne,nv,Lv,order,n,field; 170 PetscInt n_neigh,*neigh,*n_shared,**shared; 171 PetscInt i,j,extmem,cum,maxsize,nee; 172 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 173 PetscInt *sfvleaves,*sfvroots; 174 PetscInt *corners,*cedges; 175 PetscInt *ecount,**eneighs,*vcount,**vneighs; 176 PetscInt *emarks; 177 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 /* If the discrete gradient is defined for a subset of dofs and global is true, 182 it assumes G is given in global ordering for all the dofs. 183 Otherwise, the ordering is global for the Nedelec field */ 184 order = pcbddc->nedorder; 185 conforming = pcbddc->conforming; 186 field = pcbddc->nedfield; 187 global = pcbddc->nedglobal; 188 setprimal = PETSC_FALSE; 189 print = PETSC_FALSE; 190 singular = PETSC_FALSE; 191 192 /* Command line customization */ 193 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");PetscCall(ierr); 194 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL)); 195 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL)); 196 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL)); 197 /* print debug info TODO: to be removed */ 198 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL)); 199 ierr = PetscOptionsEnd();PetscCall(ierr); 200 201 /* Return if there are no edges in the decomposition and the problem is not singular */ 202 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&al2g,NULL)); 203 PetscCall(ISLocalToGlobalMappingGetSize(al2g,&n)); 204 PetscCall(PetscObjectGetComm((PetscObject)pc,&comm)); 205 if (!singular) { 206 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 207 lrc[0] = PETSC_FALSE; 208 for (i=0;i<n;i++) { 209 if (PetscRealPart(vals[i]) > 2.) { 210 lrc[0] = PETSC_TRUE; 211 break; 212 } 213 } 214 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 215 PetscCall(MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm)); 216 if (!lrc[1]) PetscFunctionReturn(0); 217 } 218 219 /* Get Nedelec field */ 220 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal,comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 221 if (pcbddc->n_ISForDofsLocal && field >= 0) { 222 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 223 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 224 PetscCall(ISGetLocalSize(nedfieldlocal,&ne)); 225 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 226 ne = n; 227 nedfieldlocal = NULL; 228 global = PETSC_TRUE; 229 } else if (field == PETSC_DECIDE) { 230 PetscInt rst,ren,*idx; 231 232 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 233 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 234 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren)); 235 for (i=rst;i<ren;i++) { 236 PetscInt nc; 237 238 PetscCall(MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 239 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 240 PetscCall(MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 241 } 242 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 243 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 244 PetscCall(PetscMalloc1(n,&idx)); 245 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 246 PetscCall(ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal)); 247 } else { 248 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 249 } 250 251 /* Sanity checks */ 252 PetscCheck(order || conforming,comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 253 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix,comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 254 PetscCheck(!order || (ne%order == 0),PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D is not a multiple of the order %D",ne,order); 255 256 /* Just set primal dofs and return */ 257 if (setprimal) { 258 IS enedfieldlocal; 259 PetscInt *eidxs; 260 261 PetscCall(PetscMalloc1(ne,&eidxs)); 262 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 263 if (nedfieldlocal) { 264 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 265 for (i=0,cum=0;i<ne;i++) { 266 if (PetscRealPart(vals[idxs[i]]) > 2.) { 267 eidxs[cum++] = idxs[i]; 268 } 269 } 270 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 271 } else { 272 for (i=0,cum=0;i<ne;i++) { 273 if (PetscRealPart(vals[i]) > 2.) { 274 eidxs[cum++] = i; 275 } 276 } 277 } 278 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 279 PetscCall(ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal)); 280 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal)); 281 PetscCall(PetscFree(eidxs)); 282 PetscCall(ISDestroy(&nedfieldlocal)); 283 PetscCall(ISDestroy(&enedfieldlocal)); 284 PetscFunctionReturn(0); 285 } 286 287 /* Compute some l2g maps */ 288 if (nedfieldlocal) { 289 IS is; 290 291 /* need to map from the local Nedelec field to local numbering */ 292 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g)); 293 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 294 PetscCall(ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is)); 295 PetscCall(ISLocalToGlobalMappingCreateIS(is,&al2g)); 296 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 297 if (global) { 298 PetscCall(PetscObjectReference((PetscObject)al2g)); 299 el2g = al2g; 300 } else { 301 IS gis; 302 303 PetscCall(ISRenumber(is,NULL,NULL,&gis)); 304 PetscCall(ISLocalToGlobalMappingCreateIS(gis,&el2g)); 305 PetscCall(ISDestroy(&gis)); 306 } 307 PetscCall(ISDestroy(&is)); 308 } else { 309 /* restore default */ 310 pcbddc->nedfield = -1; 311 /* one ref for the destruction of al2g, one for el2g */ 312 PetscCall(PetscObjectReference((PetscObject)al2g)); 313 PetscCall(PetscObjectReference((PetscObject)al2g)); 314 el2g = al2g; 315 fl2g = NULL; 316 } 317 318 /* Start communication to drop connections for interior edges (for cc analysis only) */ 319 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 320 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 321 if (nedfieldlocal) { 322 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 323 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 324 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 325 } else { 326 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 327 } 328 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 329 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 330 331 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 332 PetscCall(MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G)); 333 PetscCall(MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 334 if (global) { 335 PetscInt rst; 336 337 PetscCall(MatGetOwnershipRange(G,&rst,NULL)); 338 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 339 if (matis->sf_rootdata[i] < 2) { 340 matis->sf_rootdata[cum++] = i + rst; 341 } 342 } 343 PetscCall(MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE)); 344 PetscCall(MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL)); 345 } else { 346 PetscInt *tbz; 347 348 PetscCall(PetscMalloc1(ne,&tbz)); 349 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 350 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 351 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 352 for (i=0,cum=0;i<ne;i++) 353 if (matis->sf_leafdata[idxs[i]] == 1) 354 tbz[cum++] = i; 355 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 356 PetscCall(ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz)); 357 PetscCall(MatZeroRows(G,cum,tbz,0.,NULL,NULL)); 358 PetscCall(PetscFree(tbz)); 359 } 360 } else { /* we need the entire G to infer the nullspace */ 361 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 362 G = pcbddc->discretegradient; 363 } 364 365 /* Extract subdomain relevant rows of G */ 366 PetscCall(ISLocalToGlobalMappingGetIndices(el2g,&idxs)); 367 PetscCall(ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned)); 368 PetscCall(MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall)); 369 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g,&idxs)); 370 PetscCall(ISDestroy(&lned)); 371 PetscCall(MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis)); 372 PetscCall(MatDestroy(&lGall)); 373 PetscCall(MatISGetLocalMat(lGis,&lG)); 374 375 /* SF for nodal dofs communications */ 376 PetscCall(MatGetLocalSize(G,NULL,&Lv)); 377 PetscCall(MatISGetLocalToGlobalMapping(lGis,NULL,&vl2g)); 378 PetscCall(PetscObjectReference((PetscObject)vl2g)); 379 PetscCall(ISLocalToGlobalMappingGetSize(vl2g,&nv)); 380 PetscCall(PetscSFCreate(comm,&sfv)); 381 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g,&idxs)); 382 PetscCall(PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs)); 383 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs)); 384 i = singular ? 2 : 1; 385 PetscCall(PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots)); 386 387 /* Destroy temporary G created in MATIS format and modified G */ 388 PetscCall(PetscObjectReference((PetscObject)lG)); 389 PetscCall(MatDestroy(&lGis)); 390 PetscCall(MatDestroy(&G)); 391 392 if (print) { 393 PetscCall(PetscObjectSetName((PetscObject)lG,"initial_lG")); 394 PetscCall(MatView(lG,NULL)); 395 } 396 397 /* Save lG for values insertion in change of basis */ 398 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGinit)); 399 400 /* Analyze the edge-nodes connections (duplicate lG) */ 401 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGe)); 402 PetscCall(MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 403 PetscCall(PetscBTCreate(nv,&btv)); 404 PetscCall(PetscBTCreate(ne,&bte)); 405 PetscCall(PetscBTCreate(ne,&btb)); 406 PetscCall(PetscBTCreate(ne,&btbd)); 407 PetscCall(PetscBTCreate(nv,&btvcand)); 408 /* need to import the boundary specification to ensure the 409 proper detection of coarse edges' endpoints */ 410 if (pcbddc->DirichletBoundariesLocal) { 411 IS is; 412 413 if (fl2g) { 414 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is)); 415 } else { 416 is = pcbddc->DirichletBoundariesLocal; 417 } 418 PetscCall(ISGetLocalSize(is,&cum)); 419 PetscCall(ISGetIndices(is,&idxs)); 420 for (i=0;i<cum;i++) { 421 if (idxs[i] >= 0) { 422 PetscCall(PetscBTSet(btb,idxs[i])); 423 PetscCall(PetscBTSet(btbd,idxs[i])); 424 } 425 } 426 PetscCall(ISRestoreIndices(is,&idxs)); 427 if (fl2g) { 428 PetscCall(ISDestroy(&is)); 429 } 430 } 431 if (pcbddc->NeumannBoundariesLocal) { 432 IS is; 433 434 if (fl2g) { 435 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is)); 436 } else { 437 is = pcbddc->NeumannBoundariesLocal; 438 } 439 PetscCall(ISGetLocalSize(is,&cum)); 440 PetscCall(ISGetIndices(is,&idxs)); 441 for (i=0;i<cum;i++) { 442 if (idxs[i] >= 0) { 443 PetscCall(PetscBTSet(btb,idxs[i])); 444 } 445 } 446 PetscCall(ISRestoreIndices(is,&idxs)); 447 if (fl2g) { 448 PetscCall(ISDestroy(&is)); 449 } 450 } 451 452 /* Count neighs per dof */ 453 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs)); 454 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs)); 455 456 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 457 for proper detection of coarse edges' endpoints */ 458 PetscCall(PetscBTCreate(ne,&btee)); 459 for (i=0;i<ne;i++) { 460 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 461 PetscCall(PetscBTSet(btee,i)); 462 } 463 } 464 PetscCall(PetscMalloc1(ne,&marks)); 465 if (!conforming) { 466 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 467 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 468 } 469 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 470 PetscCall(MatSeqAIJGetArray(lGe,&vals)); 471 cum = 0; 472 for (i=0;i<ne;i++) { 473 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 474 if (!PetscBTLookup(btee,i)) { 475 marks[cum++] = i; 476 continue; 477 } 478 /* set badly connected edge dofs as primal */ 479 if (!conforming) { 480 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 481 marks[cum++] = i; 482 PetscCall(PetscBTSet(bte,i)); 483 for (j=ii[i];j<ii[i+1];j++) { 484 PetscCall(PetscBTSet(btv,jj[j])); 485 } 486 } else { 487 /* every edge dofs should be connected trough a certain number of nodal dofs 488 to other edge dofs belonging to coarse edges 489 - at most 2 endpoints 490 - order-1 interior nodal dofs 491 - no undefined nodal dofs (nconn < order) 492 */ 493 PetscInt ends = 0,ints = 0, undef = 0; 494 for (j=ii[i];j<ii[i+1];j++) { 495 PetscInt v = jj[j],k; 496 PetscInt nconn = iit[v+1]-iit[v]; 497 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 498 if (nconn > order) ends++; 499 else if (nconn == order) ints++; 500 else undef++; 501 } 502 if (undef || ends > 2 || ints != order -1) { 503 marks[cum++] = i; 504 PetscCall(PetscBTSet(bte,i)); 505 for (j=ii[i];j<ii[i+1];j++) { 506 PetscCall(PetscBTSet(btv,jj[j])); 507 } 508 } 509 } 510 } 511 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 512 if (!order && ii[i+1] != ii[i]) { 513 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 514 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 515 } 516 } 517 PetscCall(PetscBTDestroy(&btee)); 518 PetscCall(MatSeqAIJRestoreArray(lGe,&vals)); 519 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 520 if (!conforming) { 521 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 522 PetscCall(MatDestroy(&lGt)); 523 } 524 PetscCall(MatZeroRows(lGe,cum,marks,0.,NULL,NULL)); 525 526 /* identify splitpoints and corner candidates */ 527 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 528 if (print) { 529 PetscCall(PetscObjectSetName((PetscObject)lGe,"edgerestr_lG")); 530 PetscCall(MatView(lGe,NULL)); 531 PetscCall(PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt")); 532 PetscCall(MatView(lGt,NULL)); 533 } 534 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 535 PetscCall(MatSeqAIJGetArray(lGt,&vals)); 536 for (i=0;i<nv;i++) { 537 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 538 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 539 if (!order) { /* variable order */ 540 PetscReal vorder = 0.; 541 542 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 543 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 544 PetscCheck(vorder-test <= PETSC_SQRT_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",(double)vorder,test); 545 ord = 1; 546 } 547 PetscAssert(test%ord == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT,test,i,ord); 548 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 549 if (PetscBTLookup(btbd,jj[j])) { 550 bdir = PETSC_TRUE; 551 break; 552 } 553 if (vc != ecount[jj[j]]) { 554 sneighs = PETSC_FALSE; 555 } else { 556 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 557 for (k=0;k<vc;k++) { 558 if (vn[k] != en[k]) { 559 sneighs = PETSC_FALSE; 560 break; 561 } 562 } 563 } 564 } 565 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 566 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 567 PetscCall(PetscBTSet(btv,i)); 568 } else if (test == ord) { 569 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 570 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 571 PetscCall(PetscBTSet(btv,i)); 572 } else { 573 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 574 PetscCall(PetscBTSet(btvcand,i)); 575 } 576 } 577 } 578 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs)); 579 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs)); 580 PetscCall(PetscBTDestroy(&btbd)); 581 582 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 583 if (order != 1) { 584 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 585 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 586 for (i=0;i<nv;i++) { 587 if (PetscBTLookup(btvcand,i)) { 588 PetscBool found = PETSC_FALSE; 589 for (j=ii[i];j<ii[i+1] && !found;j++) { 590 PetscInt k,e = jj[j]; 591 if (PetscBTLookup(bte,e)) continue; 592 for (k=iit[e];k<iit[e+1];k++) { 593 PetscInt v = jjt[k]; 594 if (v != i && PetscBTLookup(btvcand,v)) { 595 found = PETSC_TRUE; 596 break; 597 } 598 } 599 } 600 if (!found) { 601 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 602 PetscCall(PetscBTClear(btvcand,i)); 603 } else { 604 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 605 } 606 } 607 } 608 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 609 } 610 PetscCall(MatSeqAIJRestoreArray(lGt,&vals)); 611 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 612 PetscCall(MatDestroy(&lGe)); 613 614 /* Get the local G^T explicitly */ 615 PetscCall(MatDestroy(&lGt)); 616 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 617 PetscCall(MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 618 619 /* Mark interior nodal dofs */ 620 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 621 PetscCall(PetscBTCreate(nv,&btvi)); 622 for (i=1;i<n_neigh;i++) { 623 for (j=0;j<n_shared[i];j++) { 624 PetscCall(PetscBTSet(btvi,shared[i][j])); 625 } 626 } 627 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 628 629 /* communicate corners and splitpoints */ 630 PetscCall(PetscMalloc1(nv,&vmarks)); 631 PetscCall(PetscArrayzero(sfvleaves,nv)); 632 PetscCall(PetscArrayzero(sfvroots,Lv)); 633 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 634 635 if (print) { 636 IS tbz; 637 638 cum = 0; 639 for (i=0;i<nv;i++) 640 if (sfvleaves[i]) 641 vmarks[cum++] = i; 642 643 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 644 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local")); 645 PetscCall(ISView(tbz,NULL)); 646 PetscCall(ISDestroy(&tbz)); 647 } 648 649 PetscCall(PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 650 PetscCall(PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 651 PetscCall(PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 652 PetscCall(PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 653 654 /* Zero rows of lGt corresponding to identified corners 655 and interior nodal dofs */ 656 cum = 0; 657 for (i=0;i<nv;i++) { 658 if (sfvleaves[i]) { 659 vmarks[cum++] = i; 660 PetscCall(PetscBTSet(btv,i)); 661 } 662 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 663 } 664 PetscCall(PetscBTDestroy(&btvi)); 665 if (print) { 666 IS tbz; 667 668 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 669 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior")); 670 PetscCall(ISView(tbz,NULL)); 671 PetscCall(ISDestroy(&tbz)); 672 } 673 PetscCall(MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL)); 674 PetscCall(PetscFree(vmarks)); 675 PetscCall(PetscSFDestroy(&sfv)); 676 PetscCall(PetscFree2(sfvleaves,sfvroots)); 677 678 /* Recompute G */ 679 PetscCall(MatDestroy(&lG)); 680 PetscCall(MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG)); 681 if (print) { 682 PetscCall(PetscObjectSetName((PetscObject)lG,"used_lG")); 683 PetscCall(MatView(lG,NULL)); 684 PetscCall(PetscObjectSetName((PetscObject)lGt,"used_lGt")); 685 PetscCall(MatView(lGt,NULL)); 686 } 687 688 /* Get primal dofs (if any) */ 689 cum = 0; 690 for (i=0;i<ne;i++) { 691 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 692 } 693 if (fl2g) { 694 PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,marks,marks)); 695 } 696 PetscCall(ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals)); 697 if (print) { 698 PetscCall(PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs")); 699 PetscCall(ISView(primals,NULL)); 700 } 701 PetscCall(PetscBTDestroy(&bte)); 702 /* TODO: what if the user passed in some of them ? */ 703 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 704 PetscCall(ISDestroy(&primals)); 705 706 /* Compute edge connectivity */ 707 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_")); 708 709 /* Symbolic conn = lG*lGt */ 710 PetscCall(MatProductCreate(lG,lGt,NULL,&conn)); 711 PetscCall(MatProductSetType(conn,MATPRODUCT_AB)); 712 PetscCall(MatProductSetAlgorithm(conn,"default")); 713 PetscCall(MatProductSetFill(conn,PETSC_DEFAULT)); 714 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_")); 715 PetscCall(MatProductSetFromOptions(conn)); 716 PetscCall(MatProductSymbolic(conn)); 717 718 PetscCall(MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 719 if (fl2g) { 720 PetscBT btf; 721 PetscInt *iia,*jja,*iiu,*jju; 722 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 723 724 /* create CSR for all local dofs */ 725 PetscCall(PetscMalloc1(n+1,&iia)); 726 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 727 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 728 iiu = pcbddc->mat_graph->xadj; 729 jju = pcbddc->mat_graph->adjncy; 730 } else if (pcbddc->use_local_adj) { 731 rest = PETSC_TRUE; 732 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 733 } else { 734 free = PETSC_TRUE; 735 PetscCall(PetscMalloc2(n+1,&iiu,n,&jju)); 736 iiu[0] = 0; 737 for (i=0;i<n;i++) { 738 iiu[i+1] = i+1; 739 jju[i] = -1; 740 } 741 } 742 743 /* import sizes of CSR */ 744 iia[0] = 0; 745 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 746 747 /* overwrite entries corresponding to the Nedelec field */ 748 PetscCall(PetscBTCreate(n,&btf)); 749 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 750 for (i=0;i<ne;i++) { 751 PetscCall(PetscBTSet(btf,idxs[i])); 752 iia[idxs[i]+1] = ii[i+1]-ii[i]; 753 } 754 755 /* iia in CSR */ 756 for (i=0;i<n;i++) iia[i+1] += iia[i]; 757 758 /* jja in CSR */ 759 PetscCall(PetscMalloc1(iia[n],&jja)); 760 for (i=0;i<n;i++) 761 if (!PetscBTLookup(btf,i)) 762 for (j=0;j<iiu[i+1]-iiu[i];j++) 763 jja[iia[i]+j] = jju[iiu[i]+j]; 764 765 /* map edge dofs connectivity */ 766 if (jj) { 767 PetscCall(ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj)); 768 for (i=0;i<ne;i++) { 769 PetscInt e = idxs[i]; 770 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 771 } 772 } 773 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 774 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER)); 775 if (rest) { 776 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 777 } 778 if (free) { 779 PetscCall(PetscFree2(iiu,jju)); 780 } 781 PetscCall(PetscBTDestroy(&btf)); 782 } else { 783 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER)); 784 } 785 786 /* Analyze interface for edge dofs */ 787 PetscCall(PCBDDCAnalyzeInterface(pc)); 788 pcbddc->mat_graph->twodim = PETSC_FALSE; 789 790 /* Get coarse edges in the edge space */ 791 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 792 PetscCall(MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 793 794 if (fl2g) { 795 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 796 PetscCall(PetscMalloc1(nee,&eedges)); 797 for (i=0;i<nee;i++) { 798 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 799 } 800 } else { 801 eedges = alleedges; 802 primals = allprimals; 803 } 804 805 /* Mark fine edge dofs with their coarse edge id */ 806 PetscCall(PetscArrayzero(marks,ne)); 807 PetscCall(ISGetLocalSize(primals,&cum)); 808 PetscCall(ISGetIndices(primals,&idxs)); 809 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 810 PetscCall(ISRestoreIndices(primals,&idxs)); 811 if (print) { 812 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs")); 813 PetscCall(ISView(primals,NULL)); 814 } 815 816 maxsize = 0; 817 for (i=0;i<nee;i++) { 818 PetscInt size,mark = i+1; 819 820 PetscCall(ISGetLocalSize(eedges[i],&size)); 821 PetscCall(ISGetIndices(eedges[i],&idxs)); 822 for (j=0;j<size;j++) marks[idxs[j]] = mark; 823 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 824 maxsize = PetscMax(maxsize,size); 825 } 826 827 /* Find coarse edge endpoints */ 828 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 829 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 830 for (i=0;i<nee;i++) { 831 PetscInt mark = i+1,size; 832 833 PetscCall(ISGetLocalSize(eedges[i],&size)); 834 if (!size && nedfieldlocal) continue; 835 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 836 PetscCall(ISGetIndices(eedges[i],&idxs)); 837 if (print) { 838 PetscCall(PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i)); 839 PetscCall(ISView(eedges[i],NULL)); 840 } 841 for (j=0;j<size;j++) { 842 PetscInt k, ee = idxs[j]; 843 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 844 for (k=ii[ee];k<ii[ee+1];k++) { 845 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 846 if (PetscBTLookup(btv,jj[k])) { 847 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 848 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 849 PetscInt k2; 850 PetscBool corner = PETSC_FALSE; 851 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 852 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 853 /* it's a corner if either is connected with an edge dof belonging to a different cc or 854 if the edge dof lie on the natural part of the boundary */ 855 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 856 corner = PETSC_TRUE; 857 break; 858 } 859 } 860 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 861 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 862 PetscCall(PetscBTSet(btv,jj[k])); 863 } else { 864 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 865 } 866 } 867 } 868 } 869 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 870 } 871 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 872 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 873 PetscCall(PetscBTDestroy(&btb)); 874 875 /* Reset marked primal dofs */ 876 PetscCall(ISGetLocalSize(primals,&cum)); 877 PetscCall(ISGetIndices(primals,&idxs)); 878 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 879 PetscCall(ISRestoreIndices(primals,&idxs)); 880 881 /* Now use the initial lG */ 882 PetscCall(MatDestroy(&lG)); 883 PetscCall(MatDestroy(&lGt)); 884 lG = lGinit; 885 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 886 887 /* Compute extended cols indices */ 888 PetscCall(PetscBTCreate(nv,&btvc)); 889 PetscCall(PetscBTCreate(nee,&bter)); 890 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 891 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG,&i)); 892 i *= maxsize; 893 PetscCall(PetscCalloc1(nee,&extcols)); 894 PetscCall(PetscMalloc2(i,&extrow,i,&gidxs)); 895 eerr = PETSC_FALSE; 896 for (i=0;i<nee;i++) { 897 PetscInt size,found = 0; 898 899 cum = 0; 900 PetscCall(ISGetLocalSize(eedges[i],&size)); 901 if (!size && nedfieldlocal) continue; 902 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 903 PetscCall(ISGetIndices(eedges[i],&idxs)); 904 PetscCall(PetscBTMemzero(nv,btvc)); 905 for (j=0;j<size;j++) { 906 PetscInt k,ee = idxs[j]; 907 for (k=ii[ee];k<ii[ee+1];k++) { 908 PetscInt vv = jj[k]; 909 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 910 else if (!PetscBTLookupSet(btvc,vv)) found++; 911 } 912 } 913 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 914 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 915 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 916 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 917 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 918 /* it may happen that endpoints are not defined at this point 919 if it is the case, mark this edge for a second pass */ 920 if (cum != size -1 || found != 2) { 921 PetscCall(PetscBTSet(bter,i)); 922 if (print) { 923 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge")); 924 PetscCall(ISView(eedges[i],NULL)); 925 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol")); 926 PetscCall(ISView(extcols[i],NULL)); 927 } 928 eerr = PETSC_TRUE; 929 } 930 } 931 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 932 PetscCall(MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm)); 933 if (done) { 934 PetscInt *newprimals; 935 936 PetscCall(PetscMalloc1(ne,&newprimals)); 937 PetscCall(ISGetLocalSize(primals,&cum)); 938 PetscCall(ISGetIndices(primals,&idxs)); 939 PetscCall(PetscArraycpy(newprimals,idxs,cum)); 940 PetscCall(ISRestoreIndices(primals,&idxs)); 941 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 942 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 943 for (i=0;i<nee;i++) { 944 PetscBool has_candidates = PETSC_FALSE; 945 if (PetscBTLookup(bter,i)) { 946 PetscInt size,mark = i+1; 947 948 PetscCall(ISGetLocalSize(eedges[i],&size)); 949 PetscCall(ISGetIndices(eedges[i],&idxs)); 950 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 951 for (j=0;j<size;j++) { 952 PetscInt k,ee = idxs[j]; 953 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 954 for (k=ii[ee];k<ii[ee+1];k++) { 955 /* set all candidates located on the edge as corners */ 956 if (PetscBTLookup(btvcand,jj[k])) { 957 PetscInt k2,vv = jj[k]; 958 has_candidates = PETSC_TRUE; 959 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 960 PetscCall(PetscBTSet(btv,vv)); 961 /* set all edge dofs connected to candidate as primals */ 962 for (k2=iit[vv];k2<iit[vv+1];k2++) { 963 if (marks[jjt[k2]] == mark) { 964 PetscInt k3,ee2 = jjt[k2]; 965 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 966 newprimals[cum++] = ee2; 967 /* finally set the new corners */ 968 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 969 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 970 PetscCall(PetscBTSet(btv,jj[k3])); 971 } 972 } 973 } 974 } else { 975 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 976 } 977 } 978 } 979 if (!has_candidates) { /* circular edge */ 980 PetscInt k, ee = idxs[0],*tmarks; 981 982 PetscCall(PetscCalloc1(ne,&tmarks)); 983 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 984 for (k=ii[ee];k<ii[ee+1];k++) { 985 PetscInt k2; 986 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 987 PetscCall(PetscBTSet(btv,jj[k])); 988 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 989 } 990 for (j=0;j<size;j++) { 991 if (tmarks[idxs[j]] > 1) { 992 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 993 newprimals[cum++] = idxs[j]; 994 } 995 } 996 PetscCall(PetscFree(tmarks)); 997 } 998 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 999 } 1000 PetscCall(ISDestroy(&extcols[i])); 1001 } 1002 PetscCall(PetscFree(extcols)); 1003 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 1004 PetscCall(PetscSortRemoveDupsInt(&cum,newprimals)); 1005 if (fl2g) { 1006 PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals)); 1007 PetscCall(ISDestroy(&primals)); 1008 for (i=0;i<nee;i++) { 1009 PetscCall(ISDestroy(&eedges[i])); 1010 } 1011 PetscCall(PetscFree(eedges)); 1012 } 1013 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1014 PetscCall(ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals)); 1015 PetscCall(PetscFree(newprimals)); 1016 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 1017 PetscCall(ISDestroy(&primals)); 1018 PetscCall(PCBDDCAnalyzeInterface(pc)); 1019 pcbddc->mat_graph->twodim = PETSC_FALSE; 1020 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1021 if (fl2g) { 1022 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 1023 PetscCall(PetscMalloc1(nee,&eedges)); 1024 for (i=0;i<nee;i++) { 1025 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 1026 } 1027 } else { 1028 eedges = alleedges; 1029 primals = allprimals; 1030 } 1031 PetscCall(PetscCalloc1(nee,&extcols)); 1032 1033 /* Mark again */ 1034 PetscCall(PetscArrayzero(marks,ne)); 1035 for (i=0;i<nee;i++) { 1036 PetscInt size,mark = i+1; 1037 1038 PetscCall(ISGetLocalSize(eedges[i],&size)); 1039 PetscCall(ISGetIndices(eedges[i],&idxs)); 1040 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1041 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1042 } 1043 if (print) { 1044 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass")); 1045 PetscCall(ISView(primals,NULL)); 1046 } 1047 1048 /* Recompute extended cols */ 1049 eerr = PETSC_FALSE; 1050 for (i=0;i<nee;i++) { 1051 PetscInt size; 1052 1053 cum = 0; 1054 PetscCall(ISGetLocalSize(eedges[i],&size)); 1055 if (!size && nedfieldlocal) continue; 1056 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1057 PetscCall(ISGetIndices(eedges[i],&idxs)); 1058 for (j=0;j<size;j++) { 1059 PetscInt k,ee = idxs[j]; 1060 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1061 } 1062 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1063 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 1064 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 1065 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 1066 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 1067 if (cum != size -1) { 1068 if (print) { 1069 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass")); 1070 PetscCall(ISView(eedges[i],NULL)); 1071 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass")); 1072 PetscCall(ISView(extcols[i],NULL)); 1073 } 1074 eerr = PETSC_TRUE; 1075 } 1076 } 1077 } 1078 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1079 PetscCall(PetscFree2(extrow,gidxs)); 1080 PetscCall(PetscBTDestroy(&bter)); 1081 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF)); 1082 /* an error should not occur at this point */ 1083 PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1084 1085 /* Check the number of endpoints */ 1086 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1087 PetscCall(PetscMalloc1(2*nee,&corners)); 1088 PetscCall(PetscMalloc1(nee,&cedges)); 1089 for (i=0;i<nee;i++) { 1090 PetscInt size, found = 0, gc[2]; 1091 1092 /* init with defaults */ 1093 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1094 PetscCall(ISGetLocalSize(eedges[i],&size)); 1095 if (!size && nedfieldlocal) continue; 1096 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1097 PetscCall(ISGetIndices(eedges[i],&idxs)); 1098 PetscCall(PetscBTMemzero(nv,btvc)); 1099 for (j=0;j<size;j++) { 1100 PetscInt k,ee = idxs[j]; 1101 for (k=ii[ee];k<ii[ee+1];k++) { 1102 PetscInt vv = jj[k]; 1103 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1104 PetscCheck(found != 2,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1105 corners[i*2+found++] = vv; 1106 } 1107 } 1108 } 1109 if (found != 2) { 1110 PetscInt e; 1111 if (fl2g) { 1112 PetscCall(ISLocalToGlobalMappingApply(fl2g,1,idxs,&e)); 1113 } else { 1114 e = idxs[0]; 1115 } 1116 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1117 } 1118 1119 /* get primal dof index on this coarse edge */ 1120 PetscCall(ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc)); 1121 if (gc[0] > gc[1]) { 1122 PetscInt swap = corners[2*i]; 1123 corners[2*i] = corners[2*i+1]; 1124 corners[2*i+1] = swap; 1125 } 1126 cedges[i] = idxs[size-1]; 1127 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1128 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1129 } 1130 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1131 PetscCall(PetscBTDestroy(&btvc)); 1132 1133 if (PetscDefined(USE_DEBUG)) { 1134 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1135 not interfere with neighbouring coarse edges */ 1136 PetscCall(PetscMalloc1(nee+1,&emarks)); 1137 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1138 for (i=0;i<nv;i++) { 1139 PetscInt emax = 0,eemax = 0; 1140 1141 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1142 PetscCall(PetscArrayzero(emarks,nee+1)); 1143 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1144 for (j=1;j<nee+1;j++) { 1145 if (emax < emarks[j]) { 1146 emax = emarks[j]; 1147 eemax = j; 1148 } 1149 } 1150 /* not relevant for edges */ 1151 if (!eemax) continue; 1152 1153 for (j=ii[i];j<ii[i+1];j++) { 1154 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1155 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1156 } 1157 } 1158 } 1159 PetscCall(PetscFree(emarks)); 1160 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1161 } 1162 1163 /* Compute extended rows indices for edge blocks of the change of basis */ 1164 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1165 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt,&extmem)); 1166 extmem *= maxsize; 1167 PetscCall(PetscMalloc1(extmem*nee,&extrow)); 1168 PetscCall(PetscMalloc1(nee,&extrows)); 1169 PetscCall(PetscCalloc1(nee,&extrowcum)); 1170 for (i=0;i<nv;i++) { 1171 PetscInt mark = 0,size,start; 1172 1173 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1174 for (j=ii[i];j<ii[i+1];j++) 1175 if (marks[jj[j]] && !mark) 1176 mark = marks[jj[j]]; 1177 1178 /* not relevant */ 1179 if (!mark) continue; 1180 1181 /* import extended row */ 1182 mark--; 1183 start = mark*extmem+extrowcum[mark]; 1184 size = ii[i+1]-ii[i]; 1185 PetscCheck(extrowcum[mark] + size <= extmem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1186 PetscCall(PetscArraycpy(extrow+start,jj+ii[i],size)); 1187 extrowcum[mark] += size; 1188 } 1189 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1190 PetscCall(MatDestroy(&lGt)); 1191 PetscCall(PetscFree(marks)); 1192 1193 /* Compress extrows */ 1194 cum = 0; 1195 for (i=0;i<nee;i++) { 1196 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1197 PetscCall(PetscSortRemoveDupsInt(&size,start)); 1198 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i])); 1199 cum = PetscMax(cum,size); 1200 } 1201 PetscCall(PetscFree(extrowcum)); 1202 PetscCall(PetscBTDestroy(&btv)); 1203 PetscCall(PetscBTDestroy(&btvcand)); 1204 1205 /* Workspace for lapack inner calls and VecSetValues */ 1206 PetscCall(PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork)); 1207 1208 /* Create change of basis matrix (preallocation can be improved) */ 1209 PetscCall(MatCreate(comm,&T)); 1210 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1211 pc->pmat->rmap->N,pc->pmat->rmap->N);PetscCall(ierr); 1212 PetscCall(MatSetType(T,MATAIJ)); 1213 PetscCall(MatSeqAIJSetPreallocation(T,10,NULL)); 1214 PetscCall(MatMPIAIJSetPreallocation(T,10,NULL,10,NULL)); 1215 PetscCall(MatSetLocalToGlobalMapping(T,al2g,al2g)); 1216 PetscCall(MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 1217 PetscCall(MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE)); 1218 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1219 1220 /* Defaults to identity */ 1221 PetscCall(MatCreateVecs(pc->pmat,&tvec,NULL)); 1222 PetscCall(VecSet(tvec,1.0)); 1223 PetscCall(MatDiagonalSet(T,tvec,INSERT_VALUES)); 1224 PetscCall(VecDestroy(&tvec)); 1225 1226 /* Create discrete gradient for the coarser level if needed */ 1227 PetscCall(MatDestroy(&pcbddc->nedcG)); 1228 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1229 if (pcbddc->current_level < pcbddc->max_levels) { 1230 ISLocalToGlobalMapping cel2g,cvl2g; 1231 IS wis,gwis; 1232 PetscInt cnv,cne; 1233 1234 PetscCall(ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis)); 1235 if (fl2g) { 1236 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal)); 1237 } else { 1238 PetscCall(PetscObjectReference((PetscObject)wis)); 1239 pcbddc->nedclocal = wis; 1240 } 1241 PetscCall(ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis)); 1242 PetscCall(ISDestroy(&wis)); 1243 PetscCall(ISRenumber(gwis,NULL,&cne,&wis)); 1244 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cel2g)); 1245 PetscCall(ISDestroy(&wis)); 1246 PetscCall(ISDestroy(&gwis)); 1247 1248 PetscCall(ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis)); 1249 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis)); 1250 PetscCall(ISDestroy(&wis)); 1251 PetscCall(ISRenumber(gwis,NULL,&cnv,&wis)); 1252 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cvl2g)); 1253 PetscCall(ISDestroy(&wis)); 1254 PetscCall(ISDestroy(&gwis)); 1255 1256 PetscCall(MatCreate(comm,&pcbddc->nedcG)); 1257 PetscCall(MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv)); 1258 PetscCall(MatSetType(pcbddc->nedcG,MATAIJ)); 1259 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL)); 1260 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL)); 1261 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g)); 1262 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1263 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1264 } 1265 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1266 1267 #if defined(PRINT_GDET) 1268 inc = 0; 1269 lev = pcbddc->current_level; 1270 #endif 1271 1272 /* Insert values in the change of basis matrix */ 1273 for (i=0;i<nee;i++) { 1274 Mat Gins = NULL, GKins = NULL; 1275 IS cornersis = NULL; 1276 PetscScalar cvals[2]; 1277 1278 if (pcbddc->nedcG) { 1279 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis)); 1280 } 1281 PetscCall(PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork)); 1282 if (Gins && GKins) { 1283 const PetscScalar *data; 1284 const PetscInt *rows,*cols; 1285 PetscInt nrh,nch,nrc,ncc; 1286 1287 PetscCall(ISGetIndices(eedges[i],&cols)); 1288 /* H1 */ 1289 PetscCall(ISGetIndices(extrows[i],&rows)); 1290 PetscCall(MatGetSize(Gins,&nrh,&nch)); 1291 PetscCall(MatDenseGetArrayRead(Gins,&data)); 1292 PetscCall(MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES)); 1293 PetscCall(MatDenseRestoreArrayRead(Gins,&data)); 1294 PetscCall(ISRestoreIndices(extrows[i],&rows)); 1295 /* complement */ 1296 PetscCall(MatGetSize(GKins,&nrc,&ncc)); 1297 PetscCheck(ncc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1298 PetscCheck(ncc + nch == nrc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1299 PetscCheck(ncc == 1 || !pcbddc->nedcG,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1300 PetscCall(MatDenseGetArrayRead(GKins,&data)); 1301 PetscCall(MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES)); 1302 PetscCall(MatDenseRestoreArrayRead(GKins,&data)); 1303 1304 /* coarse discrete gradient */ 1305 if (pcbddc->nedcG) { 1306 PetscInt cols[2]; 1307 1308 cols[0] = 2*i; 1309 cols[1] = 2*i+1; 1310 PetscCall(MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES)); 1311 } 1312 PetscCall(ISRestoreIndices(eedges[i],&cols)); 1313 } 1314 PetscCall(ISDestroy(&extrows[i])); 1315 PetscCall(ISDestroy(&extcols[i])); 1316 PetscCall(ISDestroy(&cornersis)); 1317 PetscCall(MatDestroy(&Gins)); 1318 PetscCall(MatDestroy(&GKins)); 1319 } 1320 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1321 1322 /* Start assembling */ 1323 PetscCall(MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY)); 1324 if (pcbddc->nedcG) { 1325 PetscCall(MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1326 } 1327 1328 /* Free */ 1329 if (fl2g) { 1330 PetscCall(ISDestroy(&primals)); 1331 for (i=0;i<nee;i++) { 1332 PetscCall(ISDestroy(&eedges[i])); 1333 } 1334 PetscCall(PetscFree(eedges)); 1335 } 1336 1337 /* hack mat_graph with primal dofs on the coarse edges */ 1338 { 1339 PCBDDCGraph graph = pcbddc->mat_graph; 1340 PetscInt *oqueue = graph->queue; 1341 PetscInt *ocptr = graph->cptr; 1342 PetscInt ncc,*idxs; 1343 1344 /* find first primal edge */ 1345 if (pcbddc->nedclocal) { 1346 PetscCall(ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1347 } else { 1348 if (fl2g) { 1349 PetscCall(ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges)); 1350 } 1351 idxs = cedges; 1352 } 1353 cum = 0; 1354 while (cum < nee && cedges[cum] < 0) cum++; 1355 1356 /* adapt connected components */ 1357 PetscCall(PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue)); 1358 graph->cptr[0] = 0; 1359 for (i=0,ncc=0;i<graph->ncc;i++) { 1360 PetscInt lc = ocptr[i+1]-ocptr[i]; 1361 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1362 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1363 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1364 ncc++; 1365 lc--; 1366 cum++; 1367 while (cum < nee && cedges[cum] < 0) cum++; 1368 } 1369 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1370 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1371 ncc++; 1372 } 1373 graph->ncc = ncc; 1374 if (pcbddc->nedclocal) { 1375 PetscCall(ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1376 } 1377 PetscCall(PetscFree2(ocptr,oqueue)); 1378 } 1379 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1380 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1381 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1382 PetscCall(MatDestroy(&conn)); 1383 1384 PetscCall(ISDestroy(&nedfieldlocal)); 1385 PetscCall(PetscFree(extrow)); 1386 PetscCall(PetscFree2(work,rwork)); 1387 PetscCall(PetscFree(corners)); 1388 PetscCall(PetscFree(cedges)); 1389 PetscCall(PetscFree(extrows)); 1390 PetscCall(PetscFree(extcols)); 1391 PetscCall(MatDestroy(&lG)); 1392 1393 /* Complete assembling */ 1394 PetscCall(MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY)); 1395 if (pcbddc->nedcG) { 1396 PetscCall(MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1397 #if 0 1398 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1399 PetscCall(MatView(pcbddc->nedcG,NULL)); 1400 #endif 1401 } 1402 1403 /* set change of basis */ 1404 PetscCall(PCBDDCSetChangeOfBasisMat(pc,T,singular)); 1405 PetscCall(MatDestroy(&T)); 1406 1407 PetscFunctionReturn(0); 1408 } 1409 1410 /* the near-null space of BDDC carries information on quadrature weights, 1411 and these can be collinear -> so cheat with MatNullSpaceCreate 1412 and create a suitable set of basis vectors first */ 1413 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1414 { 1415 PetscInt i; 1416 1417 PetscFunctionBegin; 1418 for (i=0;i<nvecs;i++) { 1419 PetscInt first,last; 1420 1421 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1422 PetscCheck(last-first >= 2*nvecs || !has_const,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1423 if (i>=first && i < last) { 1424 PetscScalar *data; 1425 PetscCall(VecGetArray(quad_vecs[i],&data)); 1426 if (!has_const) { 1427 data[i-first] = 1.; 1428 } else { 1429 data[2*i-first] = 1./PetscSqrtReal(2.); 1430 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1431 } 1432 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1433 } 1434 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1435 } 1436 PetscCall(MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp)); 1437 for (i=0;i<nvecs;i++) { /* reset vectors */ 1438 PetscInt first,last; 1439 PetscCall(VecLockReadPop(quad_vecs[i])); 1440 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1441 if (i>=first && i < last) { 1442 PetscScalar *data; 1443 PetscCall(VecGetArray(quad_vecs[i],&data)); 1444 if (!has_const) { 1445 data[i-first] = 0.; 1446 } else { 1447 data[2*i-first] = 0.; 1448 data[2*i-first+1] = 0.; 1449 } 1450 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1451 } 1452 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1453 PetscCall(VecLockReadPush(quad_vecs[i])); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1459 { 1460 Mat loc_divudotp; 1461 Vec p,v,vins,quad_vec,*quad_vecs; 1462 ISLocalToGlobalMapping map; 1463 PetscScalar *vals; 1464 const PetscScalar *array; 1465 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1466 PetscInt n_neigh,*neigh,*n_shared,**shared; 1467 PetscMPIInt rank; 1468 1469 PetscFunctionBegin; 1470 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1471 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1472 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A))); 1473 if (!maxneighs) { 1474 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1475 *nnsp = NULL; 1476 PetscFunctionReturn(0); 1477 } 1478 maxsize = 0; 1479 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1480 PetscCall(PetscMalloc2(maxsize,&gidxs,maxsize,&vals)); 1481 /* create vectors to hold quadrature weights */ 1482 PetscCall(MatCreateVecs(A,&quad_vec,NULL)); 1483 if (!transpose) { 1484 PetscCall(MatISGetLocalToGlobalMapping(A,&map,NULL)); 1485 } else { 1486 PetscCall(MatISGetLocalToGlobalMapping(A,NULL,&map)); 1487 } 1488 PetscCall(VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs)); 1489 PetscCall(VecDestroy(&quad_vec)); 1490 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp)); 1491 for (i=0;i<maxneighs;i++) { 1492 PetscCall(VecLockReadPop(quad_vecs[i])); 1493 } 1494 1495 /* compute local quad vec */ 1496 PetscCall(MatISGetLocalMat(divudotp,&loc_divudotp)); 1497 if (!transpose) { 1498 PetscCall(MatCreateVecs(loc_divudotp,&v,&p)); 1499 } else { 1500 PetscCall(MatCreateVecs(loc_divudotp,&p,&v)); 1501 } 1502 PetscCall(VecSet(p,1.)); 1503 if (!transpose) { 1504 PetscCall(MatMultTranspose(loc_divudotp,p,v)); 1505 } else { 1506 PetscCall(MatMult(loc_divudotp,p,v)); 1507 } 1508 if (vl2l) { 1509 Mat lA; 1510 VecScatter sc; 1511 1512 PetscCall(MatISGetLocalMat(A,&lA)); 1513 PetscCall(MatCreateVecs(lA,&vins,NULL)); 1514 PetscCall(VecScatterCreate(v,NULL,vins,vl2l,&sc)); 1515 PetscCall(VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1516 PetscCall(VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1517 PetscCall(VecScatterDestroy(&sc)); 1518 } else { 1519 vins = v; 1520 } 1521 PetscCall(VecGetArrayRead(vins,&array)); 1522 PetscCall(VecDestroy(&p)); 1523 1524 /* insert in global quadrature vecs */ 1525 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 1526 for (i=1;i<n_neigh;i++) { 1527 const PetscInt *idxs; 1528 PetscInt idx,nn,j; 1529 1530 idxs = shared[i]; 1531 nn = n_shared[i]; 1532 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1533 PetscCall(PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx)); 1534 idx = -(idx+1); 1535 PetscCheck(idx >= 0 && idx < maxneighs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs); 1536 PetscCall(ISLocalToGlobalMappingApply(map,nn,idxs,gidxs)); 1537 PetscCall(VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES)); 1538 } 1539 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1540 PetscCall(VecRestoreArrayRead(vins,&array)); 1541 if (vl2l) { 1542 PetscCall(VecDestroy(&vins)); 1543 } 1544 PetscCall(VecDestroy(&v)); 1545 PetscCall(PetscFree2(gidxs,vals)); 1546 1547 /* assemble near null space */ 1548 for (i=0;i<maxneighs;i++) { 1549 PetscCall(VecAssemblyBegin(quad_vecs[i])); 1550 } 1551 for (i=0;i<maxneighs;i++) { 1552 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1553 PetscCall(VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view")); 1554 PetscCall(VecLockReadPush(quad_vecs[i])); 1555 } 1556 PetscCall(VecDestroyVecs(maxneighs,&quad_vecs)); 1557 PetscFunctionReturn(0); 1558 } 1559 1560 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1561 { 1562 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1563 1564 PetscFunctionBegin; 1565 if (primalv) { 1566 if (pcbddc->user_primal_vertices_local) { 1567 IS list[2], newp; 1568 1569 list[0] = primalv; 1570 list[1] = pcbddc->user_primal_vertices_local; 1571 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp)); 1572 PetscCall(ISSortRemoveDups(newp)); 1573 PetscCall(ISDestroy(&list[1])); 1574 pcbddc->user_primal_vertices_local = newp; 1575 } else { 1576 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primalv)); 1577 } 1578 } 1579 PetscFunctionReturn(0); 1580 } 1581 1582 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1583 { 1584 PetscInt f, *comp = (PetscInt *)ctx; 1585 1586 PetscFunctionBegin; 1587 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1588 PetscFunctionReturn(0); 1589 } 1590 1591 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1592 { 1593 PetscErrorCode ierr; 1594 Vec local,global; 1595 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1596 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1597 PetscBool monolithic = PETSC_FALSE; 1598 1599 PetscFunctionBegin; 1600 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");PetscCall(ierr); 1601 PetscCall(PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL)); 1602 ierr = PetscOptionsEnd();PetscCall(ierr); 1603 /* need to convert from global to local topology information and remove references to information in global ordering */ 1604 PetscCall(MatCreateVecs(pc->pmat,&global,NULL)); 1605 PetscCall(MatCreateVecs(matis->A,&local,NULL)); 1606 PetscCall(VecBindToCPU(global,PETSC_TRUE)); 1607 PetscCall(VecBindToCPU(local,PETSC_TRUE)); 1608 if (monolithic) { /* just get block size to properly compute vertices */ 1609 if (pcbddc->vertex_size == 1) { 1610 PetscCall(MatGetBlockSize(pc->pmat,&pcbddc->vertex_size)); 1611 } 1612 goto boundary; 1613 } 1614 1615 if (pcbddc->user_provided_isfordofs) { 1616 if (pcbddc->n_ISForDofs) { 1617 PetscInt i; 1618 1619 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal)); 1620 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1621 PetscInt bs; 1622 1623 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i])); 1624 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i],&bs)); 1625 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1626 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1627 } 1628 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1629 pcbddc->n_ISForDofs = 0; 1630 PetscCall(PetscFree(pcbddc->ISForDofs)); 1631 } 1632 } else { 1633 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1634 DM dm; 1635 1636 PetscCall(MatGetDM(pc->pmat, &dm)); 1637 if (!dm) { 1638 PetscCall(PCGetDM(pc, &dm)); 1639 } 1640 if (dm) { 1641 IS *fields; 1642 PetscInt nf,i; 1643 1644 PetscCall(DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL)); 1645 PetscCall(PetscMalloc1(nf,&pcbddc->ISForDofsLocal)); 1646 for (i=0;i<nf;i++) { 1647 PetscInt bs; 1648 1649 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i])); 1650 PetscCall(ISGetBlockSize(fields[i],&bs)); 1651 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1652 PetscCall(ISDestroy(&fields[i])); 1653 } 1654 PetscCall(PetscFree(fields)); 1655 pcbddc->n_ISForDofsLocal = nf; 1656 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1657 PetscContainer c; 1658 1659 PetscCall(PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c)); 1660 if (c) { 1661 MatISLocalFields lf; 1662 PetscCall(PetscContainerGetPointer(c,(void**)&lf)); 1663 PetscCall(PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf)); 1664 } else { /* fallback, create the default fields if bs > 1 */ 1665 PetscInt i, n = matis->A->rmap->n; 1666 PetscCall(MatGetBlockSize(pc->pmat,&i)); 1667 if (i > 1) { 1668 pcbddc->n_ISForDofsLocal = i; 1669 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal)); 1670 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1671 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i])); 1672 } 1673 } 1674 } 1675 } 1676 } else { 1677 PetscInt i; 1678 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1679 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i])); 1680 } 1681 } 1682 } 1683 1684 boundary: 1685 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1686 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal)); 1687 } else if (pcbddc->DirichletBoundariesLocal) { 1688 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal)); 1689 } 1690 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1691 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal)); 1692 } else if (pcbddc->NeumannBoundariesLocal) { 1693 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal)); 1694 } 1695 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1696 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local)); 1697 } 1698 PetscCall(VecDestroy(&global)); 1699 PetscCall(VecDestroy(&local)); 1700 /* detect local disconnected subdomains if requested (use matis->A) */ 1701 if (pcbddc->detect_disconnected) { 1702 IS primalv = NULL; 1703 PetscInt i; 1704 PetscBool filter = pcbddc->detect_disconnected_filter; 1705 1706 for (i=0;i<pcbddc->n_local_subs;i++) { 1707 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1708 } 1709 PetscCall(PetscFree(pcbddc->local_subs)); 1710 PetscCall(PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv)); 1711 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,primalv)); 1712 PetscCall(ISDestroy(&primalv)); 1713 } 1714 /* early stage corner detection */ 1715 { 1716 DM dm; 1717 1718 PetscCall(MatGetDM(pc->pmat,&dm)); 1719 if (!dm) { 1720 PetscCall(PCGetDM(pc,&dm)); 1721 } 1722 if (dm) { 1723 PetscBool isda; 1724 1725 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda)); 1726 if (isda) { 1727 ISLocalToGlobalMapping l2l; 1728 IS corners; 1729 Mat lA; 1730 PetscBool gl,lo; 1731 1732 { 1733 Vec cvec; 1734 const PetscScalar *coords; 1735 PetscInt dof,n,cdim; 1736 PetscBool memc = PETSC_TRUE; 1737 1738 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1739 PetscCall(DMGetCoordinates(dm,&cvec)); 1740 PetscCall(VecGetLocalSize(cvec,&n)); 1741 PetscCall(VecGetBlockSize(cvec,&cdim)); 1742 n /= cdim; 1743 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1744 PetscCall(PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords)); 1745 PetscCall(VecGetArrayRead(cvec,&coords)); 1746 #if defined(PETSC_USE_COMPLEX) 1747 memc = PETSC_FALSE; 1748 #endif 1749 if (dof != 1) memc = PETSC_FALSE; 1750 if (memc) { 1751 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof)); 1752 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1753 PetscReal *bcoords = pcbddc->mat_graph->coords; 1754 PetscInt i, b, d; 1755 1756 for (i=0;i<n;i++) { 1757 for (b=0;b<dof;b++) { 1758 for (d=0;d<cdim;d++) { 1759 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1760 } 1761 } 1762 } 1763 } 1764 PetscCall(VecRestoreArrayRead(cvec,&coords)); 1765 pcbddc->mat_graph->cdim = cdim; 1766 pcbddc->mat_graph->cnloc = dof*n; 1767 pcbddc->mat_graph->cloc = PETSC_FALSE; 1768 } 1769 PetscCall(DMDAGetSubdomainCornersIS(dm,&corners)); 1770 PetscCall(MatISGetLocalMat(pc->pmat,&lA)); 1771 PetscCall(MatGetLocalToGlobalMapping(lA,&l2l,NULL)); 1772 PetscCall(MatISRestoreLocalMat(pc->pmat,&lA)); 1773 lo = (PetscBool)(l2l && corners); 1774 PetscCall(MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 1775 if (gl) { /* From PETSc's DMDA */ 1776 const PetscInt *idx; 1777 PetscInt dof,bs,*idxout,n; 1778 1779 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1780 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l,&bs)); 1781 PetscCall(ISGetLocalSize(corners,&n)); 1782 PetscCall(ISGetIndices(corners,&idx)); 1783 if (bs == dof) { 1784 PetscCall(PetscMalloc1(n,&idxout)); 1785 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout)); 1786 } else { /* the original DMDA local-to-local map have been modified */ 1787 PetscInt i,d; 1788 1789 PetscCall(PetscMalloc1(dof*n,&idxout)); 1790 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1791 PetscCall(ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout)); 1792 1793 bs = 1; 1794 n *= dof; 1795 } 1796 PetscCall(ISRestoreIndices(corners,&idx)); 1797 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1798 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners)); 1799 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,corners)); 1800 PetscCall(ISDestroy(&corners)); 1801 pcbddc->corner_selected = PETSC_TRUE; 1802 pcbddc->corner_selection = PETSC_TRUE; 1803 } 1804 if (corners) { 1805 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1806 } 1807 } 1808 } 1809 } 1810 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1811 DM dm; 1812 1813 PetscCall(MatGetDM(pc->pmat,&dm)); 1814 if (!dm) { 1815 PetscCall(PCGetDM(pc,&dm)); 1816 } 1817 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1818 Vec vcoords; 1819 PetscSection section; 1820 PetscReal *coords; 1821 PetscInt d,cdim,nl,nf,**ctxs; 1822 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1823 /* debug coordinates */ 1824 PetscViewer viewer; 1825 PetscBool flg; 1826 PetscViewerFormat format; 1827 const char *prefix; 1828 1829 PetscCall(DMGetCoordinateDim(dm,&cdim)); 1830 PetscCall(DMGetLocalSection(dm,§ion)); 1831 PetscCall(PetscSectionGetNumFields(section,&nf)); 1832 PetscCall(DMCreateGlobalVector(dm,&vcoords)); 1833 PetscCall(VecGetLocalSize(vcoords,&nl)); 1834 PetscCall(PetscMalloc1(nl*cdim,&coords)); 1835 PetscCall(PetscMalloc2(nf,&funcs,nf,&ctxs)); 1836 PetscCall(PetscMalloc1(nf,&ctxs[0])); 1837 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1838 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1839 1840 /* debug coordinates */ 1841 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 1842 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords),((PetscObject)vcoords)->options,prefix,"-pc_bddc_coords_vec_view",&viewer,&format,&flg)); 1843 if (flg) PetscCall(PetscViewerPushFormat(viewer,format)); 1844 for (d=0;d<cdim;d++) { 1845 PetscInt i; 1846 const PetscScalar *v; 1847 char name[16]; 1848 1849 for (i=0;i<nf;i++) ctxs[i][0] = d; 1850 PetscCall(PetscSNPrintf(name,sizeof(name),"bddc_coords_%d",(int)d)); 1851 PetscCall(PetscObjectSetName((PetscObject)vcoords,name)); 1852 PetscCall(DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords)); 1853 if (flg) PetscCall(VecView(vcoords,viewer)); 1854 PetscCall(VecGetArrayRead(vcoords,&v)); 1855 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1856 PetscCall(VecRestoreArrayRead(vcoords,&v)); 1857 } 1858 PetscCall(VecDestroy(&vcoords)); 1859 PetscCall(PCSetCoordinates(pc,cdim,nl,coords)); 1860 PetscCall(PetscFree(coords)); 1861 PetscCall(PetscFree(ctxs[0])); 1862 PetscCall(PetscFree2(funcs,ctxs)); 1863 if (flg) { 1864 PetscCall(PetscViewerPopFormat(viewer)); 1865 PetscCall(PetscViewerDestroy(&viewer)); 1866 } 1867 } 1868 } 1869 PetscFunctionReturn(0); 1870 } 1871 1872 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1873 { 1874 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1875 IS nis; 1876 const PetscInt *idxs; 1877 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1878 1879 PetscFunctionBegin; 1880 PetscCheck(mop == MPI_LAND || mop == MPI_LOR,PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1881 if (mop == MPI_LAND) { 1882 /* init rootdata with true */ 1883 for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1; 1884 } else { 1885 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 1886 } 1887 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 1888 PetscCall(ISGetLocalSize(*is,&nd)); 1889 PetscCall(ISGetIndices(*is,&idxs)); 1890 for (i=0;i<nd;i++) 1891 if (-1 < idxs[i] && idxs[i] < n) 1892 matis->sf_leafdata[idxs[i]] = 1; 1893 PetscCall(ISRestoreIndices(*is,&idxs)); 1894 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1895 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1896 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1897 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1898 if (mop == MPI_LAND) { 1899 PetscCall(PetscMalloc1(nd,&nidxs)); 1900 } else { 1901 PetscCall(PetscMalloc1(n,&nidxs)); 1902 } 1903 for (i=0,nnd=0;i<n;i++) 1904 if (matis->sf_leafdata[i]) 1905 nidxs[nnd++] = i; 1906 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis)); 1907 PetscCall(ISDestroy(is)); 1908 *is = nis; 1909 PetscFunctionReturn(0); 1910 } 1911 1912 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1913 { 1914 PC_IS *pcis = (PC_IS*)(pc->data); 1915 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1916 1917 PetscFunctionBegin; 1918 if (!pcbddc->benign_have_null) { 1919 PetscFunctionReturn(0); 1920 } 1921 if (pcbddc->ChangeOfBasisMatrix) { 1922 Vec swap; 1923 1924 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change)); 1925 swap = pcbddc->work_change; 1926 pcbddc->work_change = r; 1927 r = swap; 1928 } 1929 PetscCall(VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1930 PetscCall(VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1931 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1932 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D)); 1933 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1934 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 1935 PetscCall(VecSet(z,0.)); 1936 PetscCall(VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1937 PetscCall(VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1938 if (pcbddc->ChangeOfBasisMatrix) { 1939 pcbddc->work_change = r; 1940 PetscCall(VecCopy(z,pcbddc->work_change)); 1941 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z)); 1942 } 1943 PetscFunctionReturn(0); 1944 } 1945 1946 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1947 { 1948 PCBDDCBenignMatMult_ctx ctx; 1949 PetscBool apply_right,apply_left,reset_x; 1950 1951 PetscFunctionBegin; 1952 PetscCall(MatShellGetContext(A,&ctx)); 1953 if (transpose) { 1954 apply_right = ctx->apply_left; 1955 apply_left = ctx->apply_right; 1956 } else { 1957 apply_right = ctx->apply_right; 1958 apply_left = ctx->apply_left; 1959 } 1960 reset_x = PETSC_FALSE; 1961 if (apply_right) { 1962 const PetscScalar *ax; 1963 PetscInt nl,i; 1964 1965 PetscCall(VecGetLocalSize(x,&nl)); 1966 PetscCall(VecGetArrayRead(x,&ax)); 1967 PetscCall(PetscArraycpy(ctx->work,ax,nl)); 1968 PetscCall(VecRestoreArrayRead(x,&ax)); 1969 for (i=0;i<ctx->benign_n;i++) { 1970 PetscScalar sum,val; 1971 const PetscInt *idxs; 1972 PetscInt nz,j; 1973 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1974 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1975 sum = 0.; 1976 if (ctx->apply_p0) { 1977 val = ctx->work[idxs[nz-1]]; 1978 for (j=0;j<nz-1;j++) { 1979 sum += ctx->work[idxs[j]]; 1980 ctx->work[idxs[j]] += val; 1981 } 1982 } else { 1983 for (j=0;j<nz-1;j++) { 1984 sum += ctx->work[idxs[j]]; 1985 } 1986 } 1987 ctx->work[idxs[nz-1]] -= sum; 1988 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1989 } 1990 PetscCall(VecPlaceArray(x,ctx->work)); 1991 reset_x = PETSC_TRUE; 1992 } 1993 if (transpose) { 1994 PetscCall(MatMultTranspose(ctx->A,x,y)); 1995 } else { 1996 PetscCall(MatMult(ctx->A,x,y)); 1997 } 1998 if (reset_x) { 1999 PetscCall(VecResetArray(x)); 2000 } 2001 if (apply_left) { 2002 PetscScalar *ay; 2003 PetscInt i; 2004 2005 PetscCall(VecGetArray(y,&ay)); 2006 for (i=0;i<ctx->benign_n;i++) { 2007 PetscScalar sum,val; 2008 const PetscInt *idxs; 2009 PetscInt nz,j; 2010 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 2011 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 2012 val = -ay[idxs[nz-1]]; 2013 if (ctx->apply_p0) { 2014 sum = 0.; 2015 for (j=0;j<nz-1;j++) { 2016 sum += ay[idxs[j]]; 2017 ay[idxs[j]] += val; 2018 } 2019 ay[idxs[nz-1]] += sum; 2020 } else { 2021 for (j=0;j<nz-1;j++) { 2022 ay[idxs[j]] += val; 2023 } 2024 ay[idxs[nz-1]] = 0.; 2025 } 2026 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 2027 } 2028 PetscCall(VecRestoreArray(y,&ay)); 2029 } 2030 PetscFunctionReturn(0); 2031 } 2032 2033 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2034 { 2035 PetscFunctionBegin; 2036 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE)); 2037 PetscFunctionReturn(0); 2038 } 2039 2040 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2041 { 2042 PetscFunctionBegin; 2043 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE)); 2044 PetscFunctionReturn(0); 2045 } 2046 2047 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2048 { 2049 PC_IS *pcis = (PC_IS*)pc->data; 2050 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2051 PCBDDCBenignMatMult_ctx ctx; 2052 2053 PetscFunctionBegin; 2054 if (!restore) { 2055 Mat A_IB,A_BI; 2056 PetscScalar *work; 2057 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2058 2059 PetscCheck(!pcbddc->benign_original_mat,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2060 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2061 PetscCall(PetscMalloc1(pcis->n,&work)); 2062 PetscCall(MatCreate(PETSC_COMM_SELF,&A_IB)); 2063 PetscCall(MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE)); 2064 PetscCall(MatSetType(A_IB,MATSHELL)); 2065 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private)); 2066 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2067 PetscCall(PetscNew(&ctx)); 2068 PetscCall(MatShellSetContext(A_IB,ctx)); 2069 ctx->apply_left = PETSC_TRUE; 2070 ctx->apply_right = PETSC_FALSE; 2071 ctx->apply_p0 = PETSC_FALSE; 2072 ctx->benign_n = pcbddc->benign_n; 2073 if (reuse) { 2074 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2075 ctx->free = PETSC_FALSE; 2076 } else { /* TODO: could be optimized for successive solves */ 2077 ISLocalToGlobalMapping N_to_D; 2078 PetscInt i; 2079 2080 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D)); 2081 PetscCall(PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs)); 2082 for (i=0;i<pcbddc->benign_n;i++) { 2083 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i])); 2084 } 2085 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2086 ctx->free = PETSC_TRUE; 2087 } 2088 ctx->A = pcis->A_IB; 2089 ctx->work = work; 2090 PetscCall(MatSetUp(A_IB)); 2091 PetscCall(MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY)); 2092 PetscCall(MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY)); 2093 pcis->A_IB = A_IB; 2094 2095 /* A_BI as A_IB^T */ 2096 PetscCall(MatCreateTranspose(A_IB,&A_BI)); 2097 pcbddc->benign_original_mat = pcis->A_BI; 2098 pcis->A_BI = A_BI; 2099 } else { 2100 if (!pcbddc->benign_original_mat) { 2101 PetscFunctionReturn(0); 2102 } 2103 PetscCall(MatShellGetContext(pcis->A_IB,&ctx)); 2104 PetscCall(MatDestroy(&pcis->A_IB)); 2105 pcis->A_IB = ctx->A; 2106 ctx->A = NULL; 2107 PetscCall(MatDestroy(&pcis->A_BI)); 2108 pcis->A_BI = pcbddc->benign_original_mat; 2109 pcbddc->benign_original_mat = NULL; 2110 if (ctx->free) { 2111 PetscInt i; 2112 for (i=0;i<ctx->benign_n;i++) { 2113 PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2114 } 2115 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2116 } 2117 PetscCall(PetscFree(ctx->work)); 2118 PetscCall(PetscFree(ctx)); 2119 } 2120 PetscFunctionReturn(0); 2121 } 2122 2123 /* used just in bddc debug mode */ 2124 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2125 { 2126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2128 Mat An; 2129 2130 PetscFunctionBegin; 2131 PetscCall(MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An)); 2132 PetscCall(MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL)); 2133 if (is1) { 2134 PetscCall(MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B)); 2135 PetscCall(MatDestroy(&An)); 2136 } else { 2137 *B = An; 2138 } 2139 PetscFunctionReturn(0); 2140 } 2141 2142 /* TODO: add reuse flag */ 2143 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2144 { 2145 Mat Bt; 2146 PetscScalar *a,*bdata; 2147 const PetscInt *ii,*ij; 2148 PetscInt m,n,i,nnz,*bii,*bij; 2149 PetscBool flg_row; 2150 2151 PetscFunctionBegin; 2152 PetscCall(MatGetSize(A,&n,&m)); 2153 PetscCall(MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2154 PetscCall(MatSeqAIJGetArray(A,&a)); 2155 nnz = n; 2156 for (i=0;i<ii[n];i++) { 2157 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2158 } 2159 PetscCall(PetscMalloc1(n+1,&bii)); 2160 PetscCall(PetscMalloc1(nnz,&bij)); 2161 PetscCall(PetscMalloc1(nnz,&bdata)); 2162 nnz = 0; 2163 bii[0] = 0; 2164 for (i=0;i<n;i++) { 2165 PetscInt j; 2166 for (j=ii[i];j<ii[i+1];j++) { 2167 PetscScalar entry = a[j]; 2168 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2169 bij[nnz] = ij[j]; 2170 bdata[nnz] = entry; 2171 nnz++; 2172 } 2173 } 2174 bii[i+1] = nnz; 2175 } 2176 PetscCall(MatSeqAIJRestoreArray(A,&a)); 2177 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt)); 2178 PetscCall(MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2179 { 2180 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2181 b->free_a = PETSC_TRUE; 2182 b->free_ij = PETSC_TRUE; 2183 } 2184 if (*B == A) { 2185 PetscCall(MatDestroy(&A)); 2186 } 2187 *B = Bt; 2188 PetscFunctionReturn(0); 2189 } 2190 2191 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2192 { 2193 Mat B = NULL; 2194 DM dm; 2195 IS is_dummy,*cc_n; 2196 ISLocalToGlobalMapping l2gmap_dummy; 2197 PCBDDCGraph graph; 2198 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2199 PetscInt i,n; 2200 PetscInt *xadj,*adjncy; 2201 PetscBool isplex = PETSC_FALSE; 2202 2203 PetscFunctionBegin; 2204 if (ncc) *ncc = 0; 2205 if (cc) *cc = NULL; 2206 if (primalv) *primalv = NULL; 2207 PetscCall(PCBDDCGraphCreate(&graph)); 2208 PetscCall(MatGetDM(pc->pmat,&dm)); 2209 if (!dm) { 2210 PetscCall(PCGetDM(pc,&dm)); 2211 } 2212 if (dm) { 2213 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex)); 2214 } 2215 if (filter) isplex = PETSC_FALSE; 2216 2217 if (isplex) { /* this code has been modified from plexpartition.c */ 2218 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2219 PetscInt *adj = NULL; 2220 IS cellNumbering; 2221 const PetscInt *cellNum; 2222 PetscBool useCone, useClosure; 2223 PetscSection section; 2224 PetscSegBuffer adjBuffer; 2225 PetscSF sfPoint; 2226 2227 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2228 PetscCall(DMGetPointSF(dm, &sfPoint)); 2229 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2230 /* Build adjacency graph via a section/segbuffer */ 2231 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion)); 2232 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2233 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer)); 2234 /* Always use FVM adjacency to create partitioner graph */ 2235 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2236 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2237 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2238 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2239 for (n = 0, p = pStart; p < pEnd; p++) { 2240 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2241 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2242 adjSize = PETSC_DETERMINE; 2243 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2244 for (a = 0; a < adjSize; ++a) { 2245 const PetscInt point = adj[a]; 2246 if (pStart <= point && point < pEnd) { 2247 PetscInt *PETSC_RESTRICT pBuf; 2248 PetscCall(PetscSectionAddDof(section, p, 1)); 2249 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2250 *pBuf = point; 2251 } 2252 } 2253 n++; 2254 } 2255 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2256 /* Derive CSR graph from section/segbuffer */ 2257 PetscCall(PetscSectionSetUp(section)); 2258 PetscCall(PetscSectionGetStorageSize(section, &size)); 2259 PetscCall(PetscMalloc1(n+1, &xadj)); 2260 for (idx = 0, p = pStart; p < pEnd; p++) { 2261 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2262 PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2263 } 2264 xadj[n] = size; 2265 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2266 /* Clean up */ 2267 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2268 PetscCall(PetscSectionDestroy(§ion)); 2269 PetscCall(PetscFree(adj)); 2270 graph->xadj = xadj; 2271 graph->adjncy = adjncy; 2272 } else { 2273 Mat A; 2274 PetscBool isseqaij, flg_row; 2275 2276 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2277 if (!A->rmap->N || !A->cmap->N) { 2278 PetscCall(PCBDDCGraphDestroy(&graph)); 2279 PetscFunctionReturn(0); 2280 } 2281 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij)); 2282 if (!isseqaij && filter) { 2283 PetscBool isseqdense; 2284 2285 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense)); 2286 if (!isseqdense) { 2287 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 2288 } else { /* TODO: rectangular case and LDA */ 2289 PetscScalar *array; 2290 PetscReal chop=1.e-6; 2291 2292 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 2293 PetscCall(MatDenseGetArray(B,&array)); 2294 PetscCall(MatGetSize(B,&n,NULL)); 2295 for (i=0;i<n;i++) { 2296 PetscInt j; 2297 for (j=i+1;j<n;j++) { 2298 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2299 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2300 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2301 } 2302 } 2303 PetscCall(MatDenseRestoreArray(B,&array)); 2304 PetscCall(MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B)); 2305 } 2306 } else { 2307 PetscCall(PetscObjectReference((PetscObject)A)); 2308 B = A; 2309 } 2310 PetscCall(MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2311 2312 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2313 if (filter) { 2314 PetscScalar *data; 2315 PetscInt j,cum; 2316 2317 PetscCall(PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered)); 2318 PetscCall(MatSeqAIJGetArray(B,&data)); 2319 cum = 0; 2320 for (i=0;i<n;i++) { 2321 PetscInt t; 2322 2323 for (j=xadj[i];j<xadj[i+1];j++) { 2324 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2325 continue; 2326 } 2327 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2328 } 2329 t = xadj_filtered[i]; 2330 xadj_filtered[i] = cum; 2331 cum += t; 2332 } 2333 PetscCall(MatSeqAIJRestoreArray(B,&data)); 2334 graph->xadj = xadj_filtered; 2335 graph->adjncy = adjncy_filtered; 2336 } else { 2337 graph->xadj = xadj; 2338 graph->adjncy = adjncy; 2339 } 2340 } 2341 /* compute local connected components using PCBDDCGraph */ 2342 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy)); 2343 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy)); 2344 PetscCall(ISDestroy(&is_dummy)); 2345 PetscCall(PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT)); 2346 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2347 PetscCall(PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL)); 2348 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2349 2350 /* partial clean up */ 2351 PetscCall(PetscFree2(xadj_filtered,adjncy_filtered)); 2352 if (B) { 2353 PetscBool flg_row; 2354 PetscCall(MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2355 PetscCall(MatDestroy(&B)); 2356 } 2357 if (isplex) { 2358 PetscCall(PetscFree(xadj)); 2359 PetscCall(PetscFree(adjncy)); 2360 } 2361 2362 /* get back data */ 2363 if (isplex) { 2364 if (ncc) *ncc = graph->ncc; 2365 if (cc || primalv) { 2366 Mat A; 2367 PetscBT btv,btvt; 2368 PetscSection subSection; 2369 PetscInt *ids,cum,cump,*cids,*pids; 2370 2371 PetscCall(DMPlexGetSubdomainSection(dm,&subSection)); 2372 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2373 PetscCall(PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids)); 2374 PetscCall(PetscBTCreate(A->rmap->n,&btv)); 2375 PetscCall(PetscBTCreate(A->rmap->n,&btvt)); 2376 2377 cids[0] = 0; 2378 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2379 PetscInt j; 2380 2381 PetscCall(PetscBTMemzero(A->rmap->n,btvt)); 2382 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2383 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2384 2385 PetscCall(DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2386 for (k = 0; k < 2*size; k += 2) { 2387 PetscInt s, pp, p = closure[k], off, dof, cdof; 2388 2389 PetscCall(PetscSectionGetConstraintDof(subSection,p,&cdof)); 2390 PetscCall(PetscSectionGetOffset(subSection,p,&off)); 2391 PetscCall(PetscSectionGetDof(subSection,p,&dof)); 2392 for (s = 0; s < dof-cdof; s++) { 2393 if (PetscBTLookupSet(btvt,off+s)) continue; 2394 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2395 else pids[cump++] = off+s; /* cross-vertex */ 2396 } 2397 PetscCall(DMPlexGetTreeParent(dm,p,&pp,NULL)); 2398 if (pp != p) { 2399 PetscCall(PetscSectionGetConstraintDof(subSection,pp,&cdof)); 2400 PetscCall(PetscSectionGetOffset(subSection,pp,&off)); 2401 PetscCall(PetscSectionGetDof(subSection,pp,&dof)); 2402 for (s = 0; s < dof-cdof; s++) { 2403 if (PetscBTLookupSet(btvt,off+s)) continue; 2404 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2405 else pids[cump++] = off+s; /* cross-vertex */ 2406 } 2407 } 2408 } 2409 PetscCall(DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2410 } 2411 cids[i+1] = cum; 2412 /* mark dofs as already assigned */ 2413 for (j = cids[i]; j < cids[i+1]; j++) { 2414 PetscCall(PetscBTSet(btv,ids[j])); 2415 } 2416 } 2417 if (cc) { 2418 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2419 for (i = 0; i < graph->ncc; i++) { 2420 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i])); 2421 } 2422 *cc = cc_n; 2423 } 2424 if (primalv) { 2425 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv)); 2426 } 2427 PetscCall(PetscFree3(ids,cids,pids)); 2428 PetscCall(PetscBTDestroy(&btv)); 2429 PetscCall(PetscBTDestroy(&btvt)); 2430 } 2431 } else { 2432 if (ncc) *ncc = graph->ncc; 2433 if (cc) { 2434 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2435 for (i=0;i<graph->ncc;i++) { 2436 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i])); 2437 } 2438 *cc = cc_n; 2439 } 2440 } 2441 /* clean up graph */ 2442 graph->xadj = NULL; 2443 graph->adjncy = NULL; 2444 PetscCall(PCBDDCGraphDestroy(&graph)); 2445 PetscFunctionReturn(0); 2446 } 2447 2448 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2449 { 2450 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2451 PC_IS* pcis = (PC_IS*)(pc->data); 2452 IS dirIS = NULL; 2453 PetscInt i; 2454 2455 PetscFunctionBegin; 2456 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS)); 2457 if (zerodiag) { 2458 Mat A; 2459 Vec vec3_N; 2460 PetscScalar *vals; 2461 const PetscInt *idxs; 2462 PetscInt nz,*count; 2463 2464 /* p0 */ 2465 PetscCall(VecSet(pcis->vec1_N,0.)); 2466 PetscCall(PetscMalloc1(pcis->n,&vals)); 2467 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2468 PetscCall(ISGetIndices(zerodiag,&idxs)); 2469 for (i=0;i<nz;i++) vals[i] = 1.; 2470 PetscCall(VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES)); 2471 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2472 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2473 /* v_I */ 2474 PetscCall(VecSetRandom(pcis->vec2_N,NULL)); 2475 for (i=0;i<nz;i++) vals[i] = 0.; 2476 PetscCall(VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES)); 2477 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2478 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2479 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2480 PetscCall(VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES)); 2481 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2482 if (dirIS) { 2483 PetscInt n; 2484 2485 PetscCall(ISGetLocalSize(dirIS,&n)); 2486 PetscCall(ISGetIndices(dirIS,&idxs)); 2487 for (i=0;i<n;i++) vals[i] = 0.; 2488 PetscCall(VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES)); 2489 PetscCall(ISRestoreIndices(dirIS,&idxs)); 2490 } 2491 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2492 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2493 PetscCall(VecDuplicate(pcis->vec1_N,&vec3_N)); 2494 PetscCall(VecSet(vec3_N,0.)); 2495 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2496 PetscCall(MatMult(A,pcis->vec1_N,vec3_N)); 2497 PetscCall(VecDot(vec3_N,pcis->vec2_N,&vals[0])); 2498 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",(double)PetscAbsScalar(vals[0])); 2499 PetscCall(PetscFree(vals)); 2500 PetscCall(VecDestroy(&vec3_N)); 2501 2502 /* there should not be any pressure dofs lying on the interface */ 2503 PetscCall(PetscCalloc1(pcis->n,&count)); 2504 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2505 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2506 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2507 PetscCall(ISGetIndices(zerodiag,&idxs)); 2508 for (i=0;i<nz;i++) PetscCheck(!count[idxs[i]],PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2509 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2510 PetscCall(PetscFree(count)); 2511 } 2512 PetscCall(ISDestroy(&dirIS)); 2513 2514 /* check PCBDDCBenignGetOrSetP0 */ 2515 PetscCall(VecSetRandom(pcis->vec1_global,NULL)); 2516 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2517 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE)); 2518 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2519 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE)); 2520 for (i=0;i<pcbddc->benign_n;i++) { 2521 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2522 PetscCheck(val == -PetscGlobalRank-i,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",(double)PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2523 } 2524 PetscFunctionReturn(0); 2525 } 2526 2527 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2528 { 2529 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2530 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2531 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2532 PetscInt nz,n,benign_n,bsp = 1; 2533 PetscInt *interior_dofs,n_interior_dofs,nneu; 2534 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2535 PetscErrorCode ierr; 2536 2537 PetscFunctionBegin; 2538 if (reuse) goto project_b0; 2539 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2540 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2541 for (n=0;n<pcbddc->benign_n;n++) { 2542 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2543 } 2544 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2545 has_null_pressures = PETSC_TRUE; 2546 have_null = PETSC_TRUE; 2547 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2548 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2549 Checks if all the pressure dofs in each subdomain have a zero diagonal 2550 If not, a change of basis on pressures is not needed 2551 since the local Schur complements are already SPD 2552 */ 2553 if (pcbddc->n_ISForDofsLocal) { 2554 IS iP = NULL; 2555 PetscInt p,*pp; 2556 PetscBool flg; 2557 2558 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp)); 2559 n = pcbddc->n_ISForDofsLocal; 2560 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");PetscCall(ierr); 2561 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg)); 2562 ierr = PetscOptionsEnd();PetscCall(ierr); 2563 if (!flg) { 2564 n = 1; 2565 pp[0] = pcbddc->n_ISForDofsLocal-1; 2566 } 2567 2568 bsp = 0; 2569 for (p=0;p<n;p++) { 2570 PetscInt bs; 2571 2572 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal,PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2573 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2574 bsp += bs; 2575 } 2576 PetscCall(PetscMalloc1(bsp,&bzerodiag)); 2577 bsp = 0; 2578 for (p=0;p<n;p++) { 2579 const PetscInt *idxs; 2580 PetscInt b,bs,npl,*bidxs; 2581 2582 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2583 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl)); 2584 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2585 PetscCall(PetscMalloc1(npl/bs,&bidxs)); 2586 for (b=0;b<bs;b++) { 2587 PetscInt i; 2588 2589 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2590 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp])); 2591 bsp++; 2592 } 2593 PetscCall(PetscFree(bidxs)); 2594 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2595 } 2596 PetscCall(ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures)); 2597 2598 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2599 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP)); 2600 if (iP) { 2601 IS newpressures; 2602 2603 PetscCall(ISDifference(pressures,iP,&newpressures)); 2604 PetscCall(ISDestroy(&pressures)); 2605 pressures = newpressures; 2606 } 2607 PetscCall(ISSorted(pressures,&sorted)); 2608 if (!sorted) { 2609 PetscCall(ISSort(pressures)); 2610 } 2611 PetscCall(PetscFree(pp)); 2612 } 2613 2614 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2615 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2616 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2617 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag)); 2618 PetscCall(ISSorted(zerodiag,&sorted)); 2619 if (!sorted) { 2620 PetscCall(ISSort(zerodiag)); 2621 } 2622 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2623 zerodiag_save = zerodiag; 2624 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2625 if (!nz) { 2626 if (n) have_null = PETSC_FALSE; 2627 has_null_pressures = PETSC_FALSE; 2628 PetscCall(ISDestroy(&zerodiag)); 2629 } 2630 recompute_zerodiag = PETSC_FALSE; 2631 2632 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2633 zerodiag_subs = NULL; 2634 benign_n = 0; 2635 n_interior_dofs = 0; 2636 interior_dofs = NULL; 2637 nneu = 0; 2638 if (pcbddc->NeumannBoundariesLocal) { 2639 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu)); 2640 } 2641 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2642 if (checkb) { /* need to compute interior nodes */ 2643 PetscInt n,i,j; 2644 PetscInt n_neigh,*neigh,*n_shared,**shared; 2645 PetscInt *iwork; 2646 2647 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping,&n)); 2648 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2649 PetscCall(PetscCalloc1(n,&iwork)); 2650 PetscCall(PetscMalloc1(n,&interior_dofs)); 2651 for (i=1;i<n_neigh;i++) 2652 for (j=0;j<n_shared[i];j++) 2653 iwork[shared[i][j]] += 1; 2654 for (i=0;i<n;i++) 2655 if (!iwork[i]) 2656 interior_dofs[n_interior_dofs++] = i; 2657 PetscCall(PetscFree(iwork)); 2658 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2659 } 2660 if (has_null_pressures) { 2661 IS *subs; 2662 PetscInt nsubs,i,j,nl; 2663 const PetscInt *idxs; 2664 PetscScalar *array; 2665 Vec *work; 2666 2667 subs = pcbddc->local_subs; 2668 nsubs = pcbddc->n_local_subs; 2669 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2670 if (checkb) { 2671 PetscCall(VecDuplicateVecs(matis->y,2,&work)); 2672 PetscCall(ISGetLocalSize(zerodiag,&nl)); 2673 PetscCall(ISGetIndices(zerodiag,&idxs)); 2674 /* work[0] = 1_p */ 2675 PetscCall(VecSet(work[0],0.)); 2676 PetscCall(VecGetArray(work[0],&array)); 2677 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2678 PetscCall(VecRestoreArray(work[0],&array)); 2679 /* work[0] = 1_v */ 2680 PetscCall(VecSet(work[1],1.)); 2681 PetscCall(VecGetArray(work[1],&array)); 2682 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2683 PetscCall(VecRestoreArray(work[1],&array)); 2684 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2685 } 2686 2687 if (nsubs > 1 || bsp > 1) { 2688 IS *is; 2689 PetscInt b,totb; 2690 2691 totb = bsp; 2692 is = bsp > 1 ? bzerodiag : &zerodiag; 2693 nsubs = PetscMax(nsubs,1); 2694 PetscCall(PetscCalloc1(nsubs*totb,&zerodiag_subs)); 2695 for (b=0;b<totb;b++) { 2696 for (i=0;i<nsubs;i++) { 2697 ISLocalToGlobalMapping l2g; 2698 IS t_zerodiag_subs; 2699 PetscInt nl; 2700 2701 if (subs) { 2702 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i],&l2g)); 2703 } else { 2704 IS tis; 2705 2706 PetscCall(MatGetLocalSize(pcbddc->local_mat,&nl,NULL)); 2707 PetscCall(ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis)); 2708 PetscCall(ISLocalToGlobalMappingCreateIS(tis,&l2g)); 2709 PetscCall(ISDestroy(&tis)); 2710 } 2711 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs)); 2712 PetscCall(ISGetLocalSize(t_zerodiag_subs,&nl)); 2713 if (nl) { 2714 PetscBool valid = PETSC_TRUE; 2715 2716 if (checkb) { 2717 PetscCall(VecSet(matis->x,0)); 2718 PetscCall(ISGetLocalSize(subs[i],&nl)); 2719 PetscCall(ISGetIndices(subs[i],&idxs)); 2720 PetscCall(VecGetArray(matis->x,&array)); 2721 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2722 PetscCall(VecRestoreArray(matis->x,&array)); 2723 PetscCall(ISRestoreIndices(subs[i],&idxs)); 2724 PetscCall(VecPointwiseMult(matis->x,work[0],matis->x)); 2725 PetscCall(MatMult(matis->A,matis->x,matis->y)); 2726 PetscCall(VecPointwiseMult(matis->y,work[1],matis->y)); 2727 PetscCall(VecGetArray(matis->y,&array)); 2728 for (j=0;j<n_interior_dofs;j++) { 2729 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2730 valid = PETSC_FALSE; 2731 break; 2732 } 2733 } 2734 PetscCall(VecRestoreArray(matis->y,&array)); 2735 } 2736 if (valid && nneu) { 2737 const PetscInt *idxs; 2738 PetscInt nzb; 2739 2740 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2741 PetscCall(ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL)); 2742 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2743 if (nzb) valid = PETSC_FALSE; 2744 } 2745 if (valid && pressures) { 2746 IS t_pressure_subs,tmp; 2747 PetscInt i1,i2; 2748 2749 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs)); 2750 PetscCall(ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp)); 2751 PetscCall(ISGetLocalSize(tmp,&i1)); 2752 PetscCall(ISGetLocalSize(t_zerodiag_subs,&i2)); 2753 if (i2 != i1) valid = PETSC_FALSE; 2754 PetscCall(ISDestroy(&t_pressure_subs)); 2755 PetscCall(ISDestroy(&tmp)); 2756 } 2757 if (valid) { 2758 PetscCall(ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n])); 2759 benign_n++; 2760 } else recompute_zerodiag = PETSC_TRUE; 2761 } 2762 PetscCall(ISDestroy(&t_zerodiag_subs)); 2763 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2764 } 2765 } 2766 } else { /* there's just one subdomain (or zero if they have not been detected */ 2767 PetscBool valid = PETSC_TRUE; 2768 2769 if (nneu) valid = PETSC_FALSE; 2770 if (valid && pressures) { 2771 PetscCall(ISEqual(pressures,zerodiag,&valid)); 2772 } 2773 if (valid && checkb) { 2774 PetscCall(MatMult(matis->A,work[0],matis->x)); 2775 PetscCall(VecPointwiseMult(matis->x,work[1],matis->x)); 2776 PetscCall(VecGetArray(matis->x,&array)); 2777 for (j=0;j<n_interior_dofs;j++) { 2778 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2779 valid = PETSC_FALSE; 2780 break; 2781 } 2782 } 2783 PetscCall(VecRestoreArray(matis->x,&array)); 2784 } 2785 if (valid) { 2786 benign_n = 1; 2787 PetscCall(PetscMalloc1(benign_n,&zerodiag_subs)); 2788 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2789 zerodiag_subs[0] = zerodiag; 2790 } 2791 } 2792 if (checkb) { 2793 PetscCall(VecDestroyVecs(2,&work)); 2794 } 2795 } 2796 PetscCall(PetscFree(interior_dofs)); 2797 2798 if (!benign_n) { 2799 PetscInt n; 2800 2801 PetscCall(ISDestroy(&zerodiag)); 2802 recompute_zerodiag = PETSC_FALSE; 2803 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2804 if (n) have_null = PETSC_FALSE; 2805 } 2806 2807 /* final check for null pressures */ 2808 if (zerodiag && pressures) { 2809 PetscCall(ISEqual(pressures,zerodiag,&have_null)); 2810 } 2811 2812 if (recompute_zerodiag) { 2813 PetscCall(ISDestroy(&zerodiag)); 2814 if (benign_n == 1) { 2815 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2816 zerodiag = zerodiag_subs[0]; 2817 } else { 2818 PetscInt i,nzn,*new_idxs; 2819 2820 nzn = 0; 2821 for (i=0;i<benign_n;i++) { 2822 PetscInt ns; 2823 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2824 nzn += ns; 2825 } 2826 PetscCall(PetscMalloc1(nzn,&new_idxs)); 2827 nzn = 0; 2828 for (i=0;i<benign_n;i++) { 2829 PetscInt ns,*idxs; 2830 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2831 PetscCall(ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2832 PetscCall(PetscArraycpy(new_idxs+nzn,idxs,ns)); 2833 PetscCall(ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2834 nzn += ns; 2835 } 2836 PetscCall(PetscSortInt(nzn,new_idxs)); 2837 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag)); 2838 } 2839 have_null = PETSC_FALSE; 2840 } 2841 2842 /* determines if the coarse solver will be singular or not */ 2843 PetscCall(MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 2844 2845 /* Prepare matrix to compute no-net-flux */ 2846 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2847 Mat A,loc_divudotp; 2848 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2849 IS row,col,isused = NULL; 2850 PetscInt M,N,n,st,n_isused; 2851 2852 if (pressures) { 2853 isused = pressures; 2854 } else { 2855 isused = zerodiag_save; 2856 } 2857 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL)); 2858 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2859 PetscCall(MatGetLocalSize(A,&n,NULL)); 2860 PetscCheck(isused || (n == 0),PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2861 n_isused = 0; 2862 if (isused) { 2863 PetscCall(ISGetLocalSize(isused,&n_isused)); 2864 } 2865 PetscCallMPI(MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 2866 st = st-n_isused; 2867 if (n) { 2868 const PetscInt *gidxs; 2869 2870 PetscCall(MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp)); 2871 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 2872 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2873 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2874 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col)); 2875 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 2876 } else { 2877 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp)); 2878 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2879 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col)); 2880 } 2881 PetscCall(MatGetSize(pc->pmat,NULL,&N)); 2882 PetscCall(ISGetSize(row,&M)); 2883 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 2884 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 2885 PetscCall(ISDestroy(&row)); 2886 PetscCall(ISDestroy(&col)); 2887 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp)); 2888 PetscCall(MatSetType(pcbddc->divudotp,MATIS)); 2889 PetscCall(MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N)); 2890 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g)); 2891 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2892 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2893 PetscCall(MatISSetLocalMat(pcbddc->divudotp,loc_divudotp)); 2894 PetscCall(MatDestroy(&loc_divudotp)); 2895 PetscCall(MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2896 PetscCall(MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2897 } 2898 PetscCall(ISDestroy(&zerodiag_save)); 2899 PetscCall(ISDestroy(&pressures)); 2900 if (bzerodiag) { 2901 PetscInt i; 2902 2903 for (i=0;i<bsp;i++) { 2904 PetscCall(ISDestroy(&bzerodiag[i])); 2905 } 2906 PetscCall(PetscFree(bzerodiag)); 2907 } 2908 pcbddc->benign_n = benign_n; 2909 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2910 2911 /* determines if the problem has subdomains with 0 pressure block */ 2912 have_null = (PetscBool)(!!pcbddc->benign_n); 2913 PetscCall(MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 2914 2915 project_b0: 2916 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2917 /* change of basis and p0 dofs */ 2918 if (pcbddc->benign_n) { 2919 PetscInt i,s,*nnz; 2920 2921 /* local change of basis for pressures */ 2922 PetscCall(MatDestroy(&pcbddc->benign_change)); 2923 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change)); 2924 PetscCall(MatSetType(pcbddc->benign_change,MATAIJ)); 2925 PetscCall(MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE)); 2926 PetscCall(PetscMalloc1(n,&nnz)); 2927 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2928 for (i=0;i<pcbddc->benign_n;i++) { 2929 const PetscInt *idxs; 2930 PetscInt nzs,j; 2931 2932 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs)); 2933 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2934 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2935 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2936 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2937 } 2938 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz)); 2939 PetscCall(MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 2940 PetscCall(PetscFree(nnz)); 2941 /* set identity by default */ 2942 for (i=0;i<n;i++) { 2943 PetscCall(MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES)); 2944 } 2945 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 2946 PetscCall(PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0)); 2947 /* set change on pressures */ 2948 for (s=0;s<pcbddc->benign_n;s++) { 2949 PetscScalar *array; 2950 const PetscInt *idxs; 2951 PetscInt nzs; 2952 2953 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs)); 2954 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2955 for (i=0;i<nzs-1;i++) { 2956 PetscScalar vals[2]; 2957 PetscInt cols[2]; 2958 2959 cols[0] = idxs[i]; 2960 cols[1] = idxs[nzs-1]; 2961 vals[0] = 1.; 2962 vals[1] = 1.; 2963 PetscCall(MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES)); 2964 } 2965 PetscCall(PetscMalloc1(nzs,&array)); 2966 for (i=0;i<nzs-1;i++) array[i] = -1.; 2967 array[nzs-1] = 1.; 2968 PetscCall(MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES)); 2969 /* store local idxs for p0 */ 2970 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2971 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2972 PetscCall(PetscFree(array)); 2973 } 2974 PetscCall(MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2975 PetscCall(MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2976 2977 /* project if needed */ 2978 if (pcbddc->benign_change_explicit) { 2979 Mat M; 2980 2981 PetscCall(MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M)); 2982 PetscCall(MatDestroy(&pcbddc->local_mat)); 2983 PetscCall(MatSeqAIJCompress(M,&pcbddc->local_mat)); 2984 PetscCall(MatDestroy(&M)); 2985 } 2986 /* store global idxs for p0 */ 2987 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx)); 2988 } 2989 *zerodiaglocal = zerodiag; 2990 PetscFunctionReturn(0); 2991 } 2992 2993 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2994 { 2995 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2996 PetscScalar *array; 2997 2998 PetscFunctionBegin; 2999 if (!pcbddc->benign_sf) { 3000 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf)); 3001 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx)); 3002 } 3003 if (get) { 3004 PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array)); 3005 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 3006 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 3007 PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 3008 } else { 3009 PetscCall(VecGetArray(v,&array)); 3010 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 3011 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 3012 PetscCall(VecRestoreArray(v,&array)); 3013 } 3014 PetscFunctionReturn(0); 3015 } 3016 3017 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3018 { 3019 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3020 3021 PetscFunctionBegin; 3022 /* TODO: add error checking 3023 - avoid nested pop (or push) calls. 3024 - cannot push before pop. 3025 - cannot call this if pcbddc->local_mat is NULL 3026 */ 3027 if (!pcbddc->benign_n) { 3028 PetscFunctionReturn(0); 3029 } 3030 if (pop) { 3031 if (pcbddc->benign_change_explicit) { 3032 IS is_p0; 3033 MatReuse reuse; 3034 3035 /* extract B_0 */ 3036 reuse = MAT_INITIAL_MATRIX; 3037 if (pcbddc->benign_B0) { 3038 reuse = MAT_REUSE_MATRIX; 3039 } 3040 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0)); 3041 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0)); 3042 /* remove rows and cols from local problem */ 3043 PetscCall(MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE)); 3044 PetscCall(MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 3045 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL)); 3046 PetscCall(ISDestroy(&is_p0)); 3047 } else { 3048 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3049 PetscScalar *vals; 3050 PetscInt i,n,*idxs_ins; 3051 3052 PetscCall(VecGetLocalSize(matis->y,&n)); 3053 PetscCall(PetscMalloc2(n,&idxs_ins,n,&vals)); 3054 if (!pcbddc->benign_B0) { 3055 PetscInt *nnz; 3056 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0)); 3057 PetscCall(MatSetType(pcbddc->benign_B0,MATAIJ)); 3058 PetscCall(MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE)); 3059 PetscCall(PetscMalloc1(pcbddc->benign_n,&nnz)); 3060 for (i=0;i<pcbddc->benign_n;i++) { 3061 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i])); 3062 nnz[i] = n - nnz[i]; 3063 } 3064 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz)); 3065 PetscCall(MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 3066 PetscCall(PetscFree(nnz)); 3067 } 3068 3069 for (i=0;i<pcbddc->benign_n;i++) { 3070 PetscScalar *array; 3071 PetscInt *idxs,j,nz,cum; 3072 3073 PetscCall(VecSet(matis->x,0.)); 3074 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz)); 3075 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3076 for (j=0;j<nz;j++) vals[j] = 1.; 3077 PetscCall(VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES)); 3078 PetscCall(VecAssemblyBegin(matis->x)); 3079 PetscCall(VecAssemblyEnd(matis->x)); 3080 PetscCall(VecSet(matis->y,0.)); 3081 PetscCall(MatMult(matis->A,matis->x,matis->y)); 3082 PetscCall(VecGetArray(matis->y,&array)); 3083 cum = 0; 3084 for (j=0;j<n;j++) { 3085 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3086 vals[cum] = array[j]; 3087 idxs_ins[cum] = j; 3088 cum++; 3089 } 3090 } 3091 PetscCall(MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES)); 3092 PetscCall(VecRestoreArray(matis->y,&array)); 3093 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3094 } 3095 PetscCall(MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3096 PetscCall(MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3097 PetscCall(PetscFree2(idxs_ins,vals)); 3098 } 3099 } else { /* push */ 3100 if (pcbddc->benign_change_explicit) { 3101 PetscInt i; 3102 3103 for (i=0;i<pcbddc->benign_n;i++) { 3104 PetscScalar *B0_vals; 3105 PetscInt *B0_cols,B0_ncol; 3106 3107 PetscCall(MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3108 PetscCall(MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES)); 3109 PetscCall(MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES)); 3110 PetscCall(MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES)); 3111 PetscCall(MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3112 } 3113 PetscCall(MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3114 PetscCall(MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3115 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3116 } 3117 PetscFunctionReturn(0); 3118 } 3119 3120 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3121 { 3122 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3123 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3124 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3125 PetscBLASInt *B_iwork,*B_ifail; 3126 PetscScalar *work,lwork; 3127 PetscScalar *St,*S,*eigv; 3128 PetscScalar *Sarray,*Starray; 3129 PetscReal *eigs,thresh,lthresh,uthresh; 3130 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3131 PetscBool allocated_S_St; 3132 #if defined(PETSC_USE_COMPLEX) 3133 PetscReal *rwork; 3134 #endif 3135 PetscErrorCode ierr; 3136 3137 PetscFunctionBegin; 3138 PetscCheck(sub_schurs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3139 PetscCheck(sub_schurs->schur_explicit,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3140 PetscCheckFalse(sub_schurs->n_subs && (!sub_schurs->is_symmetric),PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3141 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3142 3143 if (pcbddc->dbg_flag) { 3144 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3145 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 3146 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n")); 3147 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3148 } 3149 3150 if (pcbddc->dbg_flag) { 3151 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef)); 3152 } 3153 3154 /* max size of subsets */ 3155 mss = 0; 3156 for (i=0;i<sub_schurs->n_subs;i++) { 3157 PetscInt subset_size; 3158 3159 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3160 mss = PetscMax(mss,subset_size); 3161 } 3162 3163 /* min/max and threshold */ 3164 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3165 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3166 nmax = PetscMax(nmin,nmax); 3167 allocated_S_St = PETSC_FALSE; 3168 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3169 allocated_S_St = PETSC_TRUE; 3170 } 3171 3172 /* allocate lapack workspace */ 3173 cum = cum2 = 0; 3174 maxneigs = 0; 3175 for (i=0;i<sub_schurs->n_subs;i++) { 3176 PetscInt n,subset_size; 3177 3178 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3179 n = PetscMin(subset_size,nmax); 3180 cum += subset_size; 3181 cum2 += subset_size*n; 3182 maxneigs = PetscMax(maxneigs,n); 3183 } 3184 lwork = 0; 3185 if (mss) { 3186 if (sub_schurs->is_symmetric) { 3187 PetscScalar sdummy = 0.; 3188 PetscBLASInt B_itype = 1; 3189 PetscBLASInt B_N = mss, idummy = 0; 3190 PetscReal rdummy = 0.,zero = 0.0; 3191 PetscReal eps = 0.0; /* dlamch? */ 3192 3193 B_lwork = -1; 3194 /* some implementations may complain about NULL pointers, even if we are querying */ 3195 S = &sdummy; 3196 St = &sdummy; 3197 eigs = &rdummy; 3198 eigv = &sdummy; 3199 B_iwork = &idummy; 3200 B_ifail = &idummy; 3201 #if defined(PETSC_USE_COMPLEX) 3202 rwork = &rdummy; 3203 #endif 3204 thresh = 1.0; 3205 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3206 #if defined(PETSC_USE_COMPLEX) 3207 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3208 #else 3209 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3210 #endif 3211 PetscCheckFalse(B_ierr != 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3212 PetscCall(PetscFPTrapPop()); 3213 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3214 } 3215 3216 nv = 0; 3217 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3218 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv)); 3219 } 3220 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork)); 3221 if (allocated_S_St) { 3222 PetscCall(PetscMalloc2(mss*mss,&S,mss*mss,&St)); 3223 } 3224 PetscCall(PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail)); 3225 #if defined(PETSC_USE_COMPLEX) 3226 PetscCall(PetscMalloc1(7*mss,&rwork)); 3227 #endif 3228 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3229 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3230 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3231 nv+cum,&pcbddc->adaptive_constraints_idxs, 3232 nv+cum2,&pcbddc->adaptive_constraints_data);PetscCall(ierr); 3233 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs)); 3234 3235 maxneigs = 0; 3236 cum = cumarray = 0; 3237 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3238 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3239 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3240 const PetscInt *idxs; 3241 3242 PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs)); 3243 for (cum=0;cum<nv;cum++) { 3244 pcbddc->adaptive_constraints_n[cum] = 1; 3245 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3246 pcbddc->adaptive_constraints_data[cum] = 1.0; 3247 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3248 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3249 } 3250 PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 3251 } 3252 3253 if (mss) { /* multilevel */ 3254 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3255 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3256 } 3257 3258 lthresh = pcbddc->adaptive_threshold[0]; 3259 uthresh = pcbddc->adaptive_threshold[1]; 3260 for (i=0;i<sub_schurs->n_subs;i++) { 3261 const PetscInt *idxs; 3262 PetscReal upper,lower; 3263 PetscInt j,subset_size,eigs_start = 0; 3264 PetscBLASInt B_N; 3265 PetscBool same_data = PETSC_FALSE; 3266 PetscBool scal = PETSC_FALSE; 3267 3268 if (pcbddc->use_deluxe_scaling) { 3269 upper = PETSC_MAX_REAL; 3270 lower = uthresh; 3271 } else { 3272 PetscCheck(sub_schurs->is_posdef,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3273 upper = 1./uthresh; 3274 lower = 0.; 3275 } 3276 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3277 PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs)); 3278 PetscCall(PetscBLASIntCast(subset_size,&B_N)); 3279 /* this is experimental: we assume the dofs have been properly grouped to have 3280 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3281 if (!sub_schurs->is_posdef) { 3282 Mat T; 3283 3284 for (j=0;j<subset_size;j++) { 3285 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3286 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T)); 3287 PetscCall(MatScale(T,-1.0)); 3288 PetscCall(MatDestroy(&T)); 3289 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T)); 3290 PetscCall(MatScale(T,-1.0)); 3291 PetscCall(MatDestroy(&T)); 3292 if (sub_schurs->change_primal_sub) { 3293 PetscInt nz,k; 3294 const PetscInt *idxs; 3295 3296 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz)); 3297 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i],&idxs)); 3298 for (k=0;k<nz;k++) { 3299 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3300 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3301 } 3302 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs)); 3303 } 3304 scal = PETSC_TRUE; 3305 break; 3306 } 3307 } 3308 } 3309 3310 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3311 if (sub_schurs->is_symmetric) { 3312 PetscInt j,k; 3313 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3314 PetscCall(PetscArrayzero(S,subset_size*subset_size)); 3315 PetscCall(PetscArrayzero(St,subset_size*subset_size)); 3316 } 3317 for (j=0;j<subset_size;j++) { 3318 for (k=j;k<subset_size;k++) { 3319 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3320 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3321 } 3322 } 3323 } else { 3324 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3325 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3326 } 3327 } else { 3328 S = Sarray + cumarray; 3329 St = Starray + cumarray; 3330 } 3331 /* see if we can save some work */ 3332 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3333 PetscCall(PetscArraycmp(S,St,subset_size*subset_size,&same_data)); 3334 } 3335 3336 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3337 B_neigs = 0; 3338 } else { 3339 if (sub_schurs->is_symmetric) { 3340 PetscBLASInt B_itype = 1; 3341 PetscBLASInt B_IL, B_IU; 3342 PetscReal eps = -1.0; /* dlamch? */ 3343 PetscInt nmin_s; 3344 PetscBool compute_range; 3345 3346 B_neigs = 0; 3347 compute_range = (PetscBool)!same_data; 3348 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3349 3350 if (pcbddc->dbg_flag) { 3351 PetscInt nc = 0; 3352 3353 if (sub_schurs->change_primal_sub) { 3354 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc)); 3355 } 3356 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc)); 3357 } 3358 3359 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3360 if (compute_range) { 3361 3362 /* ask for eigenvalues larger than thresh */ 3363 if (sub_schurs->is_posdef) { 3364 #if defined(PETSC_USE_COMPLEX) 3365 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3366 #else 3367 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3368 #endif 3369 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3370 } else { /* no theory so far, but it works nicely */ 3371 PetscInt recipe = 0,recipe_m = 1; 3372 PetscReal bb[2]; 3373 3374 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL)); 3375 switch (recipe) { 3376 case 0: 3377 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3378 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3379 #if defined(PETSC_USE_COMPLEX) 3380 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3381 #else 3382 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3383 #endif 3384 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3385 break; 3386 case 1: 3387 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3388 #if defined(PETSC_USE_COMPLEX) 3389 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3390 #else 3391 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3392 #endif 3393 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3394 if (!scal) { 3395 PetscBLASInt B_neigs2 = 0; 3396 3397 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3398 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3399 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3400 #if defined(PETSC_USE_COMPLEX) 3401 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3402 #else 3403 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3404 #endif 3405 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3406 B_neigs += B_neigs2; 3407 } 3408 break; 3409 case 2: 3410 if (scal) { 3411 bb[0] = PETSC_MIN_REAL; 3412 bb[1] = 0; 3413 #if defined(PETSC_USE_COMPLEX) 3414 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3415 #else 3416 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3417 #endif 3418 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3419 } else { 3420 PetscBLASInt B_neigs2 = 0; 3421 PetscBool import = PETSC_FALSE; 3422 3423 lthresh = PetscMax(lthresh,0.0); 3424 if (lthresh > 0.0) { 3425 bb[0] = PETSC_MIN_REAL; 3426 bb[1] = lthresh*lthresh; 3427 3428 import = PETSC_TRUE; 3429 #if defined(PETSC_USE_COMPLEX) 3430 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3431 #else 3432 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3433 #endif 3434 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3435 } 3436 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3437 bb[1] = PETSC_MAX_REAL; 3438 if (import) { 3439 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3440 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3441 } 3442 #if defined(PETSC_USE_COMPLEX) 3443 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3444 #else 3445 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3446 #endif 3447 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3448 B_neigs += B_neigs2; 3449 } 3450 break; 3451 case 3: 3452 if (scal) { 3453 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL)); 3454 } else { 3455 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL)); 3456 } 3457 if (!scal) { 3458 bb[0] = uthresh; 3459 bb[1] = PETSC_MAX_REAL; 3460 #if defined(PETSC_USE_COMPLEX) 3461 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3462 #else 3463 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3464 #endif 3465 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3466 } 3467 if (recipe_m > 0 && B_N - B_neigs > 0) { 3468 PetscBLASInt B_neigs2 = 0; 3469 3470 B_IL = 1; 3471 PetscCall(PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU)); 3472 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3473 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3474 #if defined(PETSC_USE_COMPLEX) 3475 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3476 #else 3477 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3478 #endif 3479 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3480 B_neigs += B_neigs2; 3481 } 3482 break; 3483 case 4: 3484 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3485 #if defined(PETSC_USE_COMPLEX) 3486 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3487 #else 3488 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3489 #endif 3490 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3491 { 3492 PetscBLASInt B_neigs2 = 0; 3493 3494 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3495 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3496 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3497 #if defined(PETSC_USE_COMPLEX) 3498 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3499 #else 3500 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3501 #endif 3502 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3503 B_neigs += B_neigs2; 3504 } 3505 break; 3506 case 5: /* same as before: first compute all eigenvalues, then filter */ 3507 #if defined(PETSC_USE_COMPLEX) 3508 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3509 #else 3510 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3511 #endif 3512 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3513 { 3514 PetscInt e,k,ne; 3515 for (e=0,ne=0;e<B_neigs;e++) { 3516 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3517 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3518 eigs[ne] = eigs[e]; 3519 ne++; 3520 } 3521 } 3522 PetscCall(PetscArraycpy(eigv,S,B_N*ne)); 3523 B_neigs = ne; 3524 } 3525 break; 3526 default: 3527 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3528 } 3529 } 3530 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3531 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3532 B_IL = 1; 3533 #if defined(PETSC_USE_COMPLEX) 3534 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3535 #else 3536 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3537 #endif 3538 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3539 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3540 PetscInt k; 3541 PetscCheck(sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3542 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax)); 3543 PetscCall(PetscBLASIntCast(nmax,&B_neigs)); 3544 nmin = nmax; 3545 PetscCall(PetscArrayzero(eigv,subset_size*nmax)); 3546 for (k=0;k<nmax;k++) { 3547 eigs[k] = 1./PETSC_SMALL; 3548 eigv[k*(subset_size+1)] = 1.0; 3549 } 3550 } 3551 PetscCall(PetscFPTrapPop()); 3552 if (B_ierr) { 3553 PetscCheckFalse(B_ierr < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3554 else PetscCheckFalse(B_ierr <= B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3555 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3556 } 3557 3558 if (B_neigs > nmax) { 3559 if (pcbddc->dbg_flag) { 3560 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax)); 3561 } 3562 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3563 B_neigs = nmax; 3564 } 3565 3566 nmin_s = PetscMin(nmin,B_N); 3567 if (B_neigs < nmin_s) { 3568 PetscBLASInt B_neigs2 = 0; 3569 3570 if (pcbddc->use_deluxe_scaling) { 3571 if (scal) { 3572 B_IU = nmin_s; 3573 B_IL = B_neigs + 1; 3574 } else { 3575 B_IL = B_N - nmin_s + 1; 3576 B_IU = B_N - B_neigs; 3577 } 3578 } else { 3579 B_IL = B_neigs + 1; 3580 B_IU = nmin_s; 3581 } 3582 if (pcbddc->dbg_flag) { 3583 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU)); 3584 } 3585 if (sub_schurs->is_symmetric) { 3586 PetscInt j,k; 3587 for (j=0;j<subset_size;j++) { 3588 for (k=j;k<subset_size;k++) { 3589 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3590 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3591 } 3592 } 3593 } else { 3594 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3595 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3596 } 3597 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3598 #if defined(PETSC_USE_COMPLEX) 3599 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3600 #else 3601 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3602 #endif 3603 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3604 PetscCall(PetscFPTrapPop()); 3605 B_neigs += B_neigs2; 3606 } 3607 if (B_ierr) { 3608 PetscCheckFalse(B_ierr < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3609 else PetscCheckFalse(B_ierr <= B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3610 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3611 } 3612 if (pcbddc->dbg_flag) { 3613 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs)); 3614 for (j=0;j<B_neigs;j++) { 3615 if (eigs[j] == 0.0) { 3616 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n")); 3617 } else { 3618 if (pcbddc->use_deluxe_scaling) { 3619 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start])); 3620 } else { 3621 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start])); 3622 } 3623 } 3624 } 3625 } 3626 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3627 } 3628 /* change the basis back to the original one */ 3629 if (sub_schurs->change) { 3630 Mat change,phi,phit; 3631 3632 if (pcbddc->dbg_flag > 2) { 3633 PetscInt ii; 3634 for (ii=0;ii<B_neigs;ii++) { 3635 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N)); 3636 for (j=0;j<B_N;j++) { 3637 #if defined(PETSC_USE_COMPLEX) 3638 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3639 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3640 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c)); 3641 #else 3642 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j])); 3643 #endif 3644 } 3645 } 3646 } 3647 PetscCall(KSPGetOperators(sub_schurs->change[i],&change,NULL)); 3648 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit)); 3649 PetscCall(MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi)); 3650 PetscCall(MatCopy(phi,phit,SAME_NONZERO_PATTERN)); 3651 PetscCall(MatDestroy(&phit)); 3652 PetscCall(MatDestroy(&phi)); 3653 } 3654 maxneigs = PetscMax(B_neigs,maxneigs); 3655 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3656 if (B_neigs) { 3657 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size)); 3658 3659 if (pcbddc->dbg_flag > 1) { 3660 PetscInt ii; 3661 for (ii=0;ii<B_neigs;ii++) { 3662 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N)); 3663 for (j=0;j<B_N;j++) { 3664 #if defined(PETSC_USE_COMPLEX) 3665 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3666 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3667 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c)); 3668 #else 3669 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]])); 3670 #endif 3671 } 3672 } 3673 } 3674 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size)); 3675 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3676 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3677 cum++; 3678 } 3679 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs)); 3680 /* shift for next computation */ 3681 cumarray += subset_size*subset_size; 3682 } 3683 if (pcbddc->dbg_flag) { 3684 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3685 } 3686 3687 if (mss) { 3688 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3689 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3690 /* destroy matrices (junk) */ 3691 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3692 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3693 } 3694 if (allocated_S_St) { 3695 PetscCall(PetscFree2(S,St)); 3696 } 3697 PetscCall(PetscFree5(eigv,eigs,work,B_iwork,B_ifail)); 3698 #if defined(PETSC_USE_COMPLEX) 3699 PetscCall(PetscFree(rwork)); 3700 #endif 3701 if (pcbddc->dbg_flag) { 3702 PetscInt maxneigs_r; 3703 PetscCall(MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc))); 3704 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r)); 3705 } 3706 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3707 PetscFunctionReturn(0); 3708 } 3709 3710 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3711 { 3712 PetscScalar *coarse_submat_vals; 3713 3714 PetscFunctionBegin; 3715 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3716 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3717 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3718 3719 /* Setup local neumann solver ksp_R */ 3720 /* PCBDDCSetUpLocalScatters should be called first! */ 3721 PetscCall(PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE)); 3722 3723 /* 3724 Setup local correction and local part of coarse basis. 3725 Gives back the dense local part of the coarse matrix in column major ordering 3726 */ 3727 PetscCall(PCBDDCSetUpCorrection(pc,&coarse_submat_vals)); 3728 3729 /* Compute total number of coarse nodes and setup coarse solver */ 3730 PetscCall(PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals)); 3731 3732 /* free */ 3733 PetscCall(PetscFree(coarse_submat_vals)); 3734 PetscFunctionReturn(0); 3735 } 3736 3737 PetscErrorCode PCBDDCResetCustomization(PC pc) 3738 { 3739 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3740 3741 PetscFunctionBegin; 3742 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3743 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3744 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3745 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3746 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3747 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3748 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3749 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3750 PetscCall(PCBDDCSetDofsSplitting(pc,0,NULL)); 3751 PetscCall(PCBDDCSetDofsSplittingLocal(pc,0,NULL)); 3752 PetscFunctionReturn(0); 3753 } 3754 3755 PetscErrorCode PCBDDCResetTopography(PC pc) 3756 { 3757 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3758 PetscInt i; 3759 3760 PetscFunctionBegin; 3761 PetscCall(MatDestroy(&pcbddc->nedcG)); 3762 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3763 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3764 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3765 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3766 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3767 PetscCall(VecDestroy(&pcbddc->work_change)); 3768 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3769 PetscCall(MatDestroy(&pcbddc->divudotp)); 3770 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3771 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3772 for (i=0;i<pcbddc->n_local_subs;i++) { 3773 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3774 } 3775 pcbddc->n_local_subs = 0; 3776 PetscCall(PetscFree(pcbddc->local_subs)); 3777 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3778 pcbddc->graphanalyzed = PETSC_FALSE; 3779 pcbddc->recompute_topography = PETSC_TRUE; 3780 pcbddc->corner_selected = PETSC_FALSE; 3781 PetscFunctionReturn(0); 3782 } 3783 3784 PetscErrorCode PCBDDCResetSolvers(PC pc) 3785 { 3786 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3787 3788 PetscFunctionBegin; 3789 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3790 if (pcbddc->coarse_phi_B) { 3791 PetscScalar *array; 3792 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&array)); 3793 PetscCall(PetscFree(array)); 3794 } 3795 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3796 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3797 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3798 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3799 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3800 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3801 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3802 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3803 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3804 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3805 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3806 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3807 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3808 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3809 PetscCall(KSPReset(pcbddc->ksp_D)); 3810 PetscCall(KSPReset(pcbddc->ksp_R)); 3811 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3812 PetscCall(MatDestroy(&pcbddc->local_mat)); 3813 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3814 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 3815 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3816 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3817 PetscCall(MatDestroy(&pcbddc->benign_change)); 3818 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3819 PetscCall(PCBDDCBenignShellMat(pc,PETSC_TRUE)); 3820 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3821 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3822 if (pcbddc->benign_zerodiag_subs) { 3823 PetscInt i; 3824 for (i=0;i<pcbddc->benign_n;i++) { 3825 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3826 } 3827 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3828 } 3829 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 3830 PetscFunctionReturn(0); 3831 } 3832 3833 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3834 { 3835 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3836 PC_IS *pcis = (PC_IS*)pc->data; 3837 VecType impVecType; 3838 PetscInt n_constraints,n_R,old_size; 3839 3840 PetscFunctionBegin; 3841 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3842 n_R = pcis->n - pcbddc->n_vertices; 3843 PetscCall(VecGetType(pcis->vec1_N,&impVecType)); 3844 /* local work vectors (try to avoid unneeded work)*/ 3845 /* R nodes */ 3846 old_size = -1; 3847 if (pcbddc->vec1_R) { 3848 PetscCall(VecGetSize(pcbddc->vec1_R,&old_size)); 3849 } 3850 if (n_R != old_size) { 3851 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3852 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3853 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R)); 3854 PetscCall(VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R)); 3855 PetscCall(VecSetType(pcbddc->vec1_R,impVecType)); 3856 PetscCall(VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R)); 3857 } 3858 /* local primal dofs */ 3859 old_size = -1; 3860 if (pcbddc->vec1_P) { 3861 PetscCall(VecGetSize(pcbddc->vec1_P,&old_size)); 3862 } 3863 if (pcbddc->local_primal_size != old_size) { 3864 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3865 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P)); 3866 PetscCall(VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size)); 3867 PetscCall(VecSetType(pcbddc->vec1_P,impVecType)); 3868 } 3869 /* local explicit constraints */ 3870 old_size = -1; 3871 if (pcbddc->vec1_C) { 3872 PetscCall(VecGetSize(pcbddc->vec1_C,&old_size)); 3873 } 3874 if (n_constraints && n_constraints != old_size) { 3875 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3876 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C)); 3877 PetscCall(VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints)); 3878 PetscCall(VecSetType(pcbddc->vec1_C,impVecType)); 3879 } 3880 PetscFunctionReturn(0); 3881 } 3882 3883 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3884 { 3885 /* pointers to pcis and pcbddc */ 3886 PC_IS* pcis = (PC_IS*)pc->data; 3887 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3888 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3889 /* submatrices of local problem */ 3890 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3891 /* submatrices of local coarse problem */ 3892 Mat S_VV,S_CV,S_VC,S_CC; 3893 /* working matrices */ 3894 Mat C_CR; 3895 /* additional working stuff */ 3896 PC pc_R; 3897 Mat F,Brhs = NULL; 3898 Vec dummy_vec; 3899 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3900 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3901 PetscScalar *work; 3902 PetscInt *idx_V_B; 3903 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3904 PetscInt i,n_R,n_D,n_B; 3905 PetscScalar one=1.0,m_one=-1.0; 3906 3907 PetscFunctionBegin; 3908 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3909 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 3910 3911 /* Set Non-overlapping dimensions */ 3912 n_vertices = pcbddc->n_vertices; 3913 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3914 n_B = pcis->n_B; 3915 n_D = pcis->n - n_B; 3916 n_R = pcis->n - n_vertices; 3917 3918 /* vertices in boundary numbering */ 3919 PetscCall(PetscMalloc1(n_vertices,&idx_V_B)); 3920 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B)); 3921 PetscCheckFalse(i != n_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3922 3923 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3924 PetscCall(PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals)); 3925 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV)); 3926 PetscCall(MatDenseSetLDA(S_VV,pcbddc->local_primal_size)); 3927 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV)); 3928 PetscCall(MatDenseSetLDA(S_CV,pcbddc->local_primal_size)); 3929 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC)); 3930 PetscCall(MatDenseSetLDA(S_VC,pcbddc->local_primal_size)); 3931 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC)); 3932 PetscCall(MatDenseSetLDA(S_CC,pcbddc->local_primal_size)); 3933 3934 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3935 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_R)); 3936 PetscCall(PCSetUp(pc_R)); 3937 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU)); 3938 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL)); 3939 lda_rhs = n_R; 3940 need_benign_correction = PETSC_FALSE; 3941 if (isLU || isCHOL) { 3942 PetscCall(PCFactorGetMatrix(pc_R,&F)); 3943 } else if (sub_schurs && sub_schurs->reuse_solver) { 3944 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3945 MatFactorType type; 3946 3947 F = reuse_solver->F; 3948 PetscCall(MatGetFactorType(F,&type)); 3949 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3950 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3951 PetscCall(MatGetSize(F,&lda_rhs,NULL)); 3952 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3953 } else F = NULL; 3954 3955 /* determine if we can use a sparse right-hand side */ 3956 sparserhs = PETSC_FALSE; 3957 if (F) { 3958 MatSolverType solver; 3959 3960 PetscCall(MatFactorGetSolverType(F,&solver)); 3961 PetscCall(PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs)); 3962 } 3963 3964 /* allocate workspace */ 3965 n = 0; 3966 if (n_constraints) { 3967 n += lda_rhs*n_constraints; 3968 } 3969 if (n_vertices) { 3970 n = PetscMax(2*lda_rhs*n_vertices,n); 3971 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3972 } 3973 if (!pcbddc->symmetric_primal) { 3974 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3975 } 3976 PetscCall(PetscMalloc1(n,&work)); 3977 3978 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3979 dummy_vec = NULL; 3980 if (need_benign_correction && lda_rhs != n_R && F) { 3981 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec)); 3982 PetscCall(VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE)); 3983 PetscCall(VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name)); 3984 } 3985 3986 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3987 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3988 3989 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3990 if (n_constraints) { 3991 Mat M3,C_B; 3992 IS is_aux; 3993 3994 /* Extract constraints on R nodes: C_{CR} */ 3995 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux)); 3996 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR)); 3997 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 3998 3999 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4000 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4001 if (!sparserhs) { 4002 PetscCall(PetscArrayzero(work,lda_rhs*n_constraints)); 4003 for (i=0;i<n_constraints;i++) { 4004 const PetscScalar *row_cmat_values; 4005 const PetscInt *row_cmat_indices; 4006 PetscInt size_of_constraint,j; 4007 4008 PetscCall(MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 4009 for (j=0;j<size_of_constraint;j++) { 4010 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4011 } 4012 PetscCall(MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 4013 } 4014 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs)); 4015 } else { 4016 Mat tC_CR; 4017 4018 PetscCall(MatScale(C_CR,-1.0)); 4019 if (lda_rhs != n_R) { 4020 PetscScalar *aa; 4021 PetscInt r,*ii,*jj; 4022 PetscBool done; 4023 4024 PetscCall(MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4025 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4026 PetscCall(MatSeqAIJGetArray(C_CR,&aa)); 4027 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR)); 4028 PetscCall(MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4029 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4030 } else { 4031 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4032 tC_CR = C_CR; 4033 } 4034 PetscCall(MatCreateTranspose(tC_CR,&Brhs)); 4035 PetscCall(MatDestroy(&tC_CR)); 4036 } 4037 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R)); 4038 if (F) { 4039 if (need_benign_correction) { 4040 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4041 4042 /* rhs is already zero on interior dofs, no need to change the rhs */ 4043 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n)); 4044 } 4045 PetscCall(MatMatSolve(F,Brhs,local_auxmat2_R)); 4046 if (need_benign_correction) { 4047 PetscScalar *marr; 4048 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4049 4050 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4051 if (lda_rhs != n_R) { 4052 for (i=0;i<n_constraints;i++) { 4053 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4054 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4055 PetscCall(VecResetArray(dummy_vec)); 4056 } 4057 } else { 4058 for (i=0;i<n_constraints;i++) { 4059 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4060 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4061 PetscCall(VecResetArray(pcbddc->vec1_R)); 4062 } 4063 } 4064 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4065 } 4066 } else { 4067 PetscScalar *marr; 4068 4069 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4070 for (i=0;i<n_constraints;i++) { 4071 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4072 PetscCall(VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs)); 4073 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4074 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4075 PetscCall(VecResetArray(pcbddc->vec1_R)); 4076 PetscCall(VecResetArray(pcbddc->vec2_R)); 4077 } 4078 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4079 } 4080 if (sparserhs) { 4081 PetscCall(MatScale(C_CR,-1.0)); 4082 } 4083 PetscCall(MatDestroy(&Brhs)); 4084 if (!pcbddc->switch_static) { 4085 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2)); 4086 for (i=0;i<n_constraints;i++) { 4087 Vec r, b; 4088 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R,i,&r)); 4089 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2,i,&b)); 4090 PetscCall(VecScatterBegin(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4091 PetscCall(VecScatterEnd(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4092 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2,i,&b)); 4093 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R,i,&r)); 4094 } 4095 PetscCall(MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4096 } else { 4097 if (lda_rhs != n_R) { 4098 IS dummy; 4099 4100 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy)); 4101 PetscCall(MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2)); 4102 PetscCall(ISDestroy(&dummy)); 4103 } else { 4104 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4105 pcbddc->local_auxmat2 = local_auxmat2_R; 4106 } 4107 PetscCall(MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4108 } 4109 PetscCall(ISDestroy(&is_aux)); 4110 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4111 PetscCall(MatScale(M3,m_one)); 4112 if (isCHOL) { 4113 PetscCall(MatCholeskyFactor(M3,NULL,NULL)); 4114 } else { 4115 PetscCall(MatLUFactor(M3,NULL,NULL,NULL)); 4116 } 4117 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 4118 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4119 PetscCall(MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1)); 4120 PetscCall(MatDestroy(&C_B)); 4121 PetscCall(MatCopy(M3,S_CC,SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4122 PetscCall(MatDestroy(&M3)); 4123 } 4124 4125 /* Get submatrices from subdomain matrix */ 4126 if (n_vertices) { 4127 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4128 PetscBool oldpin; 4129 #endif 4130 PetscBool isaij; 4131 IS is_aux; 4132 4133 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4134 IS tis; 4135 4136 PetscCall(ISDuplicate(pcbddc->is_R_local,&tis)); 4137 PetscCall(ISSort(tis)); 4138 PetscCall(ISComplement(tis,0,pcis->n,&is_aux)); 4139 PetscCall(ISDestroy(&tis)); 4140 } else { 4141 PetscCall(ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux)); 4142 } 4143 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4144 oldpin = pcbddc->local_mat->boundtocpu; 4145 #endif 4146 PetscCall(MatBindToCPU(pcbddc->local_mat,PETSC_TRUE)); 4147 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV)); 4148 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR)); 4149 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij)); 4150 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4151 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4152 } 4153 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV)); 4154 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4155 PetscCall(MatBindToCPU(pcbddc->local_mat,oldpin)); 4156 #endif 4157 PetscCall(ISDestroy(&is_aux)); 4158 } 4159 4160 /* Matrix of coarse basis functions (local) */ 4161 if (pcbddc->coarse_phi_B) { 4162 PetscInt on_B,on_primal,on_D=n_D; 4163 if (pcbddc->coarse_phi_D) { 4164 PetscCall(MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL)); 4165 } 4166 PetscCall(MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal)); 4167 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4168 PetscScalar *marray; 4169 4170 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&marray)); 4171 PetscCall(PetscFree(marray)); 4172 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4173 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4174 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4175 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4176 } 4177 } 4178 4179 if (!pcbddc->coarse_phi_B) { 4180 PetscScalar *marr; 4181 4182 /* memory size */ 4183 n = n_B*pcbddc->local_primal_size; 4184 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4185 if (!pcbddc->symmetric_primal) n *= 2; 4186 PetscCall(PetscCalloc1(n,&marr)); 4187 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B)); 4188 marr += n_B*pcbddc->local_primal_size; 4189 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4190 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D)); 4191 marr += n_D*pcbddc->local_primal_size; 4192 } 4193 if (!pcbddc->symmetric_primal) { 4194 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B)); 4195 marr += n_B*pcbddc->local_primal_size; 4196 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4197 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D)); 4198 } 4199 } else { 4200 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4201 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4202 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4203 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4204 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4205 } 4206 } 4207 } 4208 4209 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4210 p0_lidx_I = NULL; 4211 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4212 const PetscInt *idxs; 4213 4214 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 4215 PetscCall(PetscMalloc1(pcbddc->benign_n,&p0_lidx_I)); 4216 for (i=0;i<pcbddc->benign_n;i++) { 4217 PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i])); 4218 } 4219 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 4220 } 4221 4222 /* vertices */ 4223 if (n_vertices) { 4224 PetscBool restoreavr = PETSC_FALSE; 4225 4226 PetscCall(MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV)); 4227 4228 if (n_R) { 4229 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4230 PetscBLASInt B_N,B_one = 1; 4231 const PetscScalar *x; 4232 PetscScalar *y; 4233 4234 PetscCall(MatScale(A_RV,m_one)); 4235 if (need_benign_correction) { 4236 ISLocalToGlobalMapping RtoN; 4237 IS is_p0; 4238 PetscInt *idxs_p0,n; 4239 4240 PetscCall(PetscMalloc1(pcbddc->benign_n,&idxs_p0)); 4241 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN)); 4242 PetscCall(ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0)); 4243 PetscCheckFalse(n != pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4244 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4245 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0)); 4246 PetscCall(MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr)); 4247 PetscCall(ISDestroy(&is_p0)); 4248 } 4249 4250 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV)); 4251 if (!sparserhs || need_benign_correction) { 4252 if (lda_rhs == n_R) { 4253 PetscCall(MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV)); 4254 } else { 4255 PetscScalar *av,*array; 4256 const PetscInt *xadj,*adjncy; 4257 PetscInt n; 4258 PetscBool flg_row; 4259 4260 array = work+lda_rhs*n_vertices; 4261 PetscCall(PetscArrayzero(array,lda_rhs*n_vertices)); 4262 PetscCall(MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV)); 4263 PetscCall(MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4264 PetscCall(MatSeqAIJGetArray(A_RV,&av)); 4265 for (i=0;i<n;i++) { 4266 PetscInt j; 4267 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4268 } 4269 PetscCall(MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4270 PetscCall(MatDestroy(&A_RV)); 4271 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV)); 4272 } 4273 if (need_benign_correction) { 4274 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4275 PetscScalar *marr; 4276 4277 PetscCall(MatDenseGetArray(A_RV,&marr)); 4278 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4279 4280 | 0 0 0 | (V) 4281 L = | 0 0 -1 | (P-p0) 4282 | 0 0 -1 | (p0) 4283 4284 */ 4285 for (i=0;i<reuse_solver->benign_n;i++) { 4286 const PetscScalar *vals; 4287 const PetscInt *idxs,*idxs_zero; 4288 PetscInt n,j,nz; 4289 4290 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4291 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4292 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4293 for (j=0;j<n;j++) { 4294 PetscScalar val = vals[j]; 4295 PetscInt k,col = idxs[j]; 4296 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4297 } 4298 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4299 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4300 } 4301 PetscCall(MatDenseRestoreArray(A_RV,&marr)); 4302 } 4303 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4304 Brhs = A_RV; 4305 } else { 4306 Mat tA_RVT,A_RVT; 4307 4308 if (!pcbddc->symmetric_primal) { 4309 /* A_RV already scaled by -1 */ 4310 PetscCall(MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT)); 4311 } else { 4312 restoreavr = PETSC_TRUE; 4313 PetscCall(MatScale(A_VR,-1.0)); 4314 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4315 A_RVT = A_VR; 4316 } 4317 if (lda_rhs != n_R) { 4318 PetscScalar *aa; 4319 PetscInt r,*ii,*jj; 4320 PetscBool done; 4321 4322 PetscCall(MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4323 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4324 PetscCall(MatSeqAIJGetArray(A_RVT,&aa)); 4325 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT)); 4326 PetscCall(MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4327 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4328 } else { 4329 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4330 tA_RVT = A_RVT; 4331 } 4332 PetscCall(MatCreateTranspose(tA_RVT,&Brhs)); 4333 PetscCall(MatDestroy(&tA_RVT)); 4334 PetscCall(MatDestroy(&A_RVT)); 4335 } 4336 if (F) { 4337 /* need to correct the rhs */ 4338 if (need_benign_correction) { 4339 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4340 PetscScalar *marr; 4341 4342 PetscCall(MatDenseGetArray(Brhs,&marr)); 4343 if (lda_rhs != n_R) { 4344 for (i=0;i<n_vertices;i++) { 4345 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4346 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE)); 4347 PetscCall(VecResetArray(dummy_vec)); 4348 } 4349 } else { 4350 for (i=0;i<n_vertices;i++) { 4351 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4352 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE)); 4353 PetscCall(VecResetArray(pcbddc->vec1_R)); 4354 } 4355 } 4356 PetscCall(MatDenseRestoreArray(Brhs,&marr)); 4357 } 4358 PetscCall(MatMatSolve(F,Brhs,A_RRmA_RV)); 4359 if (restoreavr) { 4360 PetscCall(MatScale(A_VR,-1.0)); 4361 } 4362 /* need to correct the solution */ 4363 if (need_benign_correction) { 4364 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4365 PetscScalar *marr; 4366 4367 PetscCall(MatDenseGetArray(A_RRmA_RV,&marr)); 4368 if (lda_rhs != n_R) { 4369 for (i=0;i<n_vertices;i++) { 4370 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4371 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4372 PetscCall(VecResetArray(dummy_vec)); 4373 } 4374 } else { 4375 for (i=0;i<n_vertices;i++) { 4376 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4377 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4378 PetscCall(VecResetArray(pcbddc->vec1_R)); 4379 } 4380 } 4381 PetscCall(MatDenseRestoreArray(A_RRmA_RV,&marr)); 4382 } 4383 } else { 4384 PetscCall(MatDenseGetArray(Brhs,&y)); 4385 for (i=0;i<n_vertices;i++) { 4386 PetscCall(VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs)); 4387 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs)); 4388 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4389 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4390 PetscCall(VecResetArray(pcbddc->vec1_R)); 4391 PetscCall(VecResetArray(pcbddc->vec2_R)); 4392 } 4393 PetscCall(MatDenseRestoreArray(Brhs,&y)); 4394 } 4395 PetscCall(MatDestroy(&A_RV)); 4396 PetscCall(MatDestroy(&Brhs)); 4397 /* S_VV and S_CV */ 4398 if (n_constraints) { 4399 Mat B; 4400 4401 PetscCall(PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices)); 4402 for (i=0;i<n_vertices;i++) { 4403 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4404 PetscCall(VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B)); 4405 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4406 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4407 PetscCall(VecResetArray(pcis->vec1_B)); 4408 PetscCall(VecResetArray(pcbddc->vec1_R)); 4409 } 4410 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B)); 4411 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4412 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV)); 4413 PetscCall(MatProductSetType(S_CV,MATPRODUCT_AB)); 4414 PetscCall(MatProductSetFromOptions(S_CV)); 4415 PetscCall(MatProductSymbolic(S_CV)); 4416 PetscCall(MatProductNumeric(S_CV)); 4417 PetscCall(MatProductClear(S_CV)); 4418 4419 PetscCall(MatDestroy(&B)); 4420 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B)); 4421 /* Reuse B = local_auxmat2_R * S_CV */ 4422 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B)); 4423 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4424 PetscCall(MatProductSetFromOptions(B)); 4425 PetscCall(MatProductSymbolic(B)); 4426 PetscCall(MatProductNumeric(B)); 4427 4428 PetscCall(MatScale(S_CV,m_one)); 4429 PetscCall(PetscBLASIntCast(lda_rhs*n_vertices,&B_N)); 4430 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4431 PetscCall(MatDestroy(&B)); 4432 } 4433 if (lda_rhs != n_R) { 4434 PetscCall(MatDestroy(&A_RRmA_RV)); 4435 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV)); 4436 PetscCall(MatDenseSetLDA(A_RRmA_RV,lda_rhs)); 4437 } 4438 PetscCall(MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt)); 4439 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4440 if (need_benign_correction) { 4441 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4442 PetscScalar *marr,*sums; 4443 4444 PetscCall(PetscMalloc1(n_vertices,&sums)); 4445 PetscCall(MatDenseGetArray(S_VVt,&marr)); 4446 for (i=0;i<reuse_solver->benign_n;i++) { 4447 const PetscScalar *vals; 4448 const PetscInt *idxs,*idxs_zero; 4449 PetscInt n,j,nz; 4450 4451 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4452 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4453 for (j=0;j<n_vertices;j++) { 4454 PetscInt k; 4455 sums[j] = 0.; 4456 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4457 } 4458 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4459 for (j=0;j<n;j++) { 4460 PetscScalar val = vals[j]; 4461 PetscInt k; 4462 for (k=0;k<n_vertices;k++) { 4463 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4464 } 4465 } 4466 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4467 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4468 } 4469 PetscCall(PetscFree(sums)); 4470 PetscCall(MatDenseRestoreArray(S_VVt,&marr)); 4471 PetscCall(MatDestroy(&A_RV_bcorr)); 4472 } 4473 PetscCall(MatDestroy(&A_RRmA_RV)); 4474 PetscCall(PetscBLASIntCast(n_vertices*n_vertices,&B_N)); 4475 PetscCall(MatDenseGetArrayRead(A_VV,&x)); 4476 PetscCall(MatDenseGetArray(S_VVt,&y)); 4477 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4478 PetscCall(MatDenseRestoreArrayRead(A_VV,&x)); 4479 PetscCall(MatDenseRestoreArray(S_VVt,&y)); 4480 PetscCall(MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN)); 4481 PetscCall(MatDestroy(&S_VVt)); 4482 } else { 4483 PetscCall(MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN)); 4484 } 4485 PetscCall(MatDestroy(&A_VV)); 4486 4487 /* coarse basis functions */ 4488 for (i=0;i<n_vertices;i++) { 4489 Vec v; 4490 PetscScalar one = 1.0,zero = 0.0; 4491 4492 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4493 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i,&v)); 4494 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4495 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4496 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4497 PetscMPIInt rank; 4498 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),&rank)); 4499 PetscCheckFalse(rank > 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4500 } 4501 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4502 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4503 PetscCall(VecAssemblyEnd(v)); 4504 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i,&v)); 4505 4506 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4507 PetscInt j; 4508 4509 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i,&v)); 4510 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4511 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4512 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4513 PetscMPIInt rank; 4514 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),&rank)); 4515 PetscCheckFalse(rank > 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4516 } 4517 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4518 PetscCall(VecAssemblyBegin(v)); 4519 PetscCall(VecAssemblyEnd(v)); 4520 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i,&v)); 4521 } 4522 PetscCall(VecResetArray(pcbddc->vec1_R)); 4523 } 4524 /* if n_R == 0 the object is not destroyed */ 4525 PetscCall(MatDestroy(&A_RV)); 4526 } 4527 PetscCall(VecDestroy(&dummy_vec)); 4528 4529 if (n_constraints) { 4530 Mat B; 4531 4532 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B)); 4533 PetscCall(MatScale(S_CC,m_one)); 4534 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B)); 4535 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4536 PetscCall(MatProductSetFromOptions(B)); 4537 PetscCall(MatProductSymbolic(B)); 4538 PetscCall(MatProductNumeric(B)); 4539 4540 PetscCall(MatScale(S_CC,m_one)); 4541 if (n_vertices) { 4542 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4543 PetscCall(MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC)); 4544 } else { 4545 Mat S_VCt; 4546 4547 if (lda_rhs != n_R) { 4548 PetscCall(MatDestroy(&B)); 4549 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B)); 4550 PetscCall(MatDenseSetLDA(B,lda_rhs)); 4551 } 4552 PetscCall(MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt)); 4553 PetscCall(MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN)); 4554 PetscCall(MatDestroy(&S_VCt)); 4555 } 4556 } 4557 PetscCall(MatDestroy(&B)); 4558 /* coarse basis functions */ 4559 for (i=0;i<n_constraints;i++) { 4560 Vec v; 4561 4562 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4563 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4564 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4565 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4566 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4567 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4568 PetscInt j; 4569 PetscScalar zero = 0.0; 4570 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4571 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4572 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4573 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4574 PetscCall(VecAssemblyBegin(v)); 4575 PetscCall(VecAssemblyEnd(v)); 4576 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4577 } 4578 PetscCall(VecResetArray(pcbddc->vec1_R)); 4579 } 4580 } 4581 if (n_constraints) { 4582 PetscCall(MatDestroy(&local_auxmat2_R)); 4583 } 4584 PetscCall(PetscFree(p0_lidx_I)); 4585 4586 /* coarse matrix entries relative to B_0 */ 4587 if (pcbddc->benign_n) { 4588 Mat B0_B,B0_BPHI; 4589 IS is_dummy; 4590 const PetscScalar *data; 4591 PetscInt j; 4592 4593 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4594 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4595 PetscCall(ISDestroy(&is_dummy)); 4596 PetscCall(MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4597 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4598 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data)); 4599 for (j=0;j<pcbddc->benign_n;j++) { 4600 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4601 for (i=0;i<pcbddc->local_primal_size;i++) { 4602 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4603 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4604 } 4605 } 4606 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data)); 4607 PetscCall(MatDestroy(&B0_B)); 4608 PetscCall(MatDestroy(&B0_BPHI)); 4609 } 4610 4611 /* compute other basis functions for non-symmetric problems */ 4612 if (!pcbddc->symmetric_primal) { 4613 Mat B_V=NULL,B_C=NULL; 4614 PetscScalar *marray; 4615 4616 if (n_constraints) { 4617 Mat S_CCT,C_CRT; 4618 4619 PetscCall(MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT)); 4620 PetscCall(MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT)); 4621 PetscCall(MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C)); 4622 PetscCall(MatDestroy(&S_CCT)); 4623 if (n_vertices) { 4624 Mat S_VCT; 4625 4626 PetscCall(MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT)); 4627 PetscCall(MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V)); 4628 PetscCall(MatDestroy(&S_VCT)); 4629 } 4630 PetscCall(MatDestroy(&C_CRT)); 4631 } else { 4632 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V)); 4633 } 4634 if (n_vertices && n_R) { 4635 PetscScalar *av,*marray; 4636 const PetscInt *xadj,*adjncy; 4637 PetscInt n; 4638 PetscBool flg_row; 4639 4640 /* B_V = B_V - A_VR^T */ 4641 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4642 PetscCall(MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4643 PetscCall(MatSeqAIJGetArray(A_VR,&av)); 4644 PetscCall(MatDenseGetArray(B_V,&marray)); 4645 for (i=0;i<n;i++) { 4646 PetscInt j; 4647 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4648 } 4649 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4650 PetscCall(MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4651 PetscCall(MatDestroy(&A_VR)); 4652 } 4653 4654 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4655 if (n_vertices) { 4656 PetscCall(MatDenseGetArray(B_V,&marray)); 4657 for (i=0;i<n_vertices;i++) { 4658 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+i*n_R)); 4659 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4660 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4661 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4662 PetscCall(VecResetArray(pcbddc->vec1_R)); 4663 PetscCall(VecResetArray(pcbddc->vec2_R)); 4664 } 4665 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4666 } 4667 if (B_C) { 4668 PetscCall(MatDenseGetArray(B_C,&marray)); 4669 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4670 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R)); 4671 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4672 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4673 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4674 PetscCall(VecResetArray(pcbddc->vec1_R)); 4675 PetscCall(VecResetArray(pcbddc->vec2_R)); 4676 } 4677 PetscCall(MatDenseRestoreArray(B_C,&marray)); 4678 } 4679 /* coarse basis functions */ 4680 for (i=0;i<pcbddc->local_primal_size;i++) { 4681 Vec v; 4682 4683 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*n_R)); 4684 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B,i,&v)); 4685 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4686 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4687 if (i<n_vertices) { 4688 PetscScalar one = 1.0; 4689 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4690 PetscCall(VecAssemblyBegin(v)); 4691 PetscCall(VecAssemblyEnd(v)); 4692 } 4693 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B,i,&v)); 4694 4695 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4696 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D,i,&v)); 4697 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4698 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4699 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D,i,&v)); 4700 } 4701 PetscCall(VecResetArray(pcbddc->vec1_R)); 4702 } 4703 PetscCall(MatDestroy(&B_V)); 4704 PetscCall(MatDestroy(&B_C)); 4705 } 4706 4707 /* free memory */ 4708 PetscCall(PetscFree(idx_V_B)); 4709 PetscCall(MatDestroy(&S_VV)); 4710 PetscCall(MatDestroy(&S_CV)); 4711 PetscCall(MatDestroy(&S_VC)); 4712 PetscCall(MatDestroy(&S_CC)); 4713 PetscCall(PetscFree(work)); 4714 if (n_vertices) { 4715 PetscCall(MatDestroy(&A_VR)); 4716 } 4717 if (n_constraints) { 4718 PetscCall(MatDestroy(&C_CR)); 4719 } 4720 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 4721 4722 /* Checking coarse_sub_mat and coarse basis functios */ 4723 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4724 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4725 if (pcbddc->dbg_flag) { 4726 Mat coarse_sub_mat; 4727 Mat AUXMAT,TM1,TM2,TM3,TM4; 4728 Mat coarse_phi_D,coarse_phi_B; 4729 Mat coarse_psi_D,coarse_psi_B; 4730 Mat A_II,A_BB,A_IB,A_BI; 4731 Mat C_B,CPHI; 4732 IS is_dummy; 4733 Vec mones; 4734 MatType checkmattype=MATSEQAIJ; 4735 PetscReal real_value; 4736 4737 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4738 Mat A; 4739 PetscCall(PCBDDCBenignProject(pc,NULL,NULL,&A)); 4740 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II)); 4741 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB)); 4742 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI)); 4743 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB)); 4744 PetscCall(MatDestroy(&A)); 4745 } else { 4746 PetscCall(MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II)); 4747 PetscCall(MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB)); 4748 PetscCall(MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI)); 4749 PetscCall(MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB)); 4750 } 4751 PetscCall(MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D)); 4752 PetscCall(MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B)); 4753 if (!pcbddc->symmetric_primal) { 4754 PetscCall(MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D)); 4755 PetscCall(MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B)); 4756 } 4757 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat)); 4758 4759 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 4760 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal)); 4761 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4762 if (!pcbddc->symmetric_primal) { 4763 PetscCall(MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4764 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1)); 4765 PetscCall(MatDestroy(&AUXMAT)); 4766 PetscCall(MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4767 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2)); 4768 PetscCall(MatDestroy(&AUXMAT)); 4769 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4770 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4771 PetscCall(MatDestroy(&AUXMAT)); 4772 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4773 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4774 PetscCall(MatDestroy(&AUXMAT)); 4775 } else { 4776 PetscCall(MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1)); 4777 PetscCall(MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2)); 4778 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4779 PetscCall(MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4780 PetscCall(MatDestroy(&AUXMAT)); 4781 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4782 PetscCall(MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4783 PetscCall(MatDestroy(&AUXMAT)); 4784 } 4785 PetscCall(MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN)); 4786 PetscCall(MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN)); 4787 PetscCall(MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN)); 4788 PetscCall(MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1)); 4789 if (pcbddc->benign_n) { 4790 Mat B0_B,B0_BPHI; 4791 const PetscScalar *data2; 4792 PetscScalar *data; 4793 PetscInt j; 4794 4795 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4796 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4797 PetscCall(MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4798 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4799 PetscCall(MatDenseGetArray(TM1,&data)); 4800 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data2)); 4801 for (j=0;j<pcbddc->benign_n;j++) { 4802 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4803 for (i=0;i<pcbddc->local_primal_size;i++) { 4804 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4805 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4806 } 4807 } 4808 PetscCall(MatDenseRestoreArray(TM1,&data)); 4809 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data2)); 4810 PetscCall(MatDestroy(&B0_B)); 4811 PetscCall(ISDestroy(&is_dummy)); 4812 PetscCall(MatDestroy(&B0_BPHI)); 4813 } 4814 #if 0 4815 { 4816 PetscViewer viewer; 4817 char filename[256]; 4818 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4819 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4820 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4821 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4822 PetscCall(MatView(coarse_sub_mat,viewer)); 4823 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4824 PetscCall(MatView(TM1,viewer)); 4825 if (pcbddc->coarse_phi_B) { 4826 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4827 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4828 } 4829 if (pcbddc->coarse_phi_D) { 4830 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4831 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4832 } 4833 if (pcbddc->coarse_psi_B) { 4834 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4835 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4836 } 4837 if (pcbddc->coarse_psi_D) { 4838 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4839 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4840 } 4841 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4842 PetscCall(MatView(pcbddc->local_mat,viewer)); 4843 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4844 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4845 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4846 PetscCall(ISView(pcis->is_I_local,viewer)); 4847 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4848 PetscCall(ISView(pcis->is_B_local,viewer)); 4849 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4850 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4851 PetscCall(PetscViewerDestroy(&viewer)); 4852 } 4853 #endif 4854 PetscCall(MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN)); 4855 PetscCall(MatNorm(TM1,NORM_FROBENIUS,&real_value)); 4856 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4857 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value)); 4858 4859 /* check constraints */ 4860 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy)); 4861 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 4862 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4863 PetscCall(MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4864 } else { 4865 PetscScalar *data; 4866 Mat tmat; 4867 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&data)); 4868 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat)); 4869 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B,&data)); 4870 PetscCall(MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4871 PetscCall(MatDestroy(&tmat)); 4872 } 4873 PetscCall(MatCreateVecs(CPHI,&mones,NULL)); 4874 PetscCall(VecSet(mones,-1.0)); 4875 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4876 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4877 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value)); 4878 if (!pcbddc->symmetric_primal) { 4879 PetscCall(MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI)); 4880 PetscCall(VecSet(mones,-1.0)); 4881 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4882 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4883 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value)); 4884 } 4885 PetscCall(MatDestroy(&C_B)); 4886 PetscCall(MatDestroy(&CPHI)); 4887 PetscCall(ISDestroy(&is_dummy)); 4888 PetscCall(VecDestroy(&mones)); 4889 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4890 PetscCall(MatDestroy(&A_II)); 4891 PetscCall(MatDestroy(&A_BB)); 4892 PetscCall(MatDestroy(&A_IB)); 4893 PetscCall(MatDestroy(&A_BI)); 4894 PetscCall(MatDestroy(&TM1)); 4895 PetscCall(MatDestroy(&TM2)); 4896 PetscCall(MatDestroy(&TM3)); 4897 PetscCall(MatDestroy(&TM4)); 4898 PetscCall(MatDestroy(&coarse_phi_D)); 4899 PetscCall(MatDestroy(&coarse_phi_B)); 4900 if (!pcbddc->symmetric_primal) { 4901 PetscCall(MatDestroy(&coarse_psi_D)); 4902 PetscCall(MatDestroy(&coarse_psi_B)); 4903 } 4904 PetscCall(MatDestroy(&coarse_sub_mat)); 4905 } 4906 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4907 { 4908 PetscBool gpu; 4909 4910 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu)); 4911 if (gpu) { 4912 if (pcbddc->local_auxmat1) { 4913 PetscCall(MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1)); 4914 } 4915 if (pcbddc->local_auxmat2) { 4916 PetscCall(MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2)); 4917 } 4918 if (pcbddc->coarse_phi_B) { 4919 PetscCall(MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B)); 4920 } 4921 if (pcbddc->coarse_phi_D) { 4922 PetscCall(MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D)); 4923 } 4924 if (pcbddc->coarse_psi_B) { 4925 PetscCall(MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B)); 4926 } 4927 if (pcbddc->coarse_psi_D) { 4928 PetscCall(MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D)); 4929 } 4930 } 4931 } 4932 /* get back data */ 4933 *coarse_submat_vals_n = coarse_submat_vals; 4934 PetscFunctionReturn(0); 4935 } 4936 4937 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4938 { 4939 Mat *work_mat; 4940 IS isrow_s,iscol_s; 4941 PetscBool rsorted,csorted; 4942 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4943 4944 PetscFunctionBegin; 4945 PetscCall(ISSorted(isrow,&rsorted)); 4946 PetscCall(ISSorted(iscol,&csorted)); 4947 PetscCall(ISGetLocalSize(isrow,&rsize)); 4948 PetscCall(ISGetLocalSize(iscol,&csize)); 4949 4950 if (!rsorted) { 4951 const PetscInt *idxs; 4952 PetscInt *idxs_sorted,i; 4953 4954 PetscCall(PetscMalloc1(rsize,&idxs_perm_r)); 4955 PetscCall(PetscMalloc1(rsize,&idxs_sorted)); 4956 for (i=0;i<rsize;i++) { 4957 idxs_perm_r[i] = i; 4958 } 4959 PetscCall(ISGetIndices(isrow,&idxs)); 4960 PetscCall(PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r)); 4961 for (i=0;i<rsize;i++) { 4962 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4963 } 4964 PetscCall(ISRestoreIndices(isrow,&idxs)); 4965 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s)); 4966 } else { 4967 PetscCall(PetscObjectReference((PetscObject)isrow)); 4968 isrow_s = isrow; 4969 } 4970 4971 if (!csorted) { 4972 if (isrow == iscol) { 4973 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4974 iscol_s = isrow_s; 4975 } else { 4976 const PetscInt *idxs; 4977 PetscInt *idxs_sorted,i; 4978 4979 PetscCall(PetscMalloc1(csize,&idxs_perm_c)); 4980 PetscCall(PetscMalloc1(csize,&idxs_sorted)); 4981 for (i=0;i<csize;i++) { 4982 idxs_perm_c[i] = i; 4983 } 4984 PetscCall(ISGetIndices(iscol,&idxs)); 4985 PetscCall(PetscSortIntWithPermutation(csize,idxs,idxs_perm_c)); 4986 for (i=0;i<csize;i++) { 4987 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4988 } 4989 PetscCall(ISRestoreIndices(iscol,&idxs)); 4990 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s)); 4991 } 4992 } else { 4993 PetscCall(PetscObjectReference((PetscObject)iscol)); 4994 iscol_s = iscol; 4995 } 4996 4997 PetscCall(MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat)); 4998 4999 if (!rsorted || !csorted) { 5000 Mat new_mat; 5001 IS is_perm_r,is_perm_c; 5002 5003 if (!rsorted) { 5004 PetscInt *idxs_r,i; 5005 PetscCall(PetscMalloc1(rsize,&idxs_r)); 5006 for (i=0;i<rsize;i++) { 5007 idxs_r[idxs_perm_r[i]] = i; 5008 } 5009 PetscCall(PetscFree(idxs_perm_r)); 5010 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r)); 5011 } else { 5012 PetscCall(ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r)); 5013 } 5014 PetscCall(ISSetPermutation(is_perm_r)); 5015 5016 if (!csorted) { 5017 if (isrow_s == iscol_s) { 5018 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5019 is_perm_c = is_perm_r; 5020 } else { 5021 PetscInt *idxs_c,i; 5022 PetscCheck(idxs_perm_c,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5023 PetscCall(PetscMalloc1(csize,&idxs_c)); 5024 for (i=0;i<csize;i++) { 5025 idxs_c[idxs_perm_c[i]] = i; 5026 } 5027 PetscCall(PetscFree(idxs_perm_c)); 5028 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c)); 5029 } 5030 } else { 5031 PetscCall(ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c)); 5032 } 5033 PetscCall(ISSetPermutation(is_perm_c)); 5034 5035 PetscCall(MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat)); 5036 PetscCall(MatDestroy(&work_mat[0])); 5037 work_mat[0] = new_mat; 5038 PetscCall(ISDestroy(&is_perm_r)); 5039 PetscCall(ISDestroy(&is_perm_c)); 5040 } 5041 5042 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5043 *B = work_mat[0]; 5044 PetscCall(MatDestroyMatrices(1,&work_mat)); 5045 PetscCall(ISDestroy(&isrow_s)); 5046 PetscCall(ISDestroy(&iscol_s)); 5047 PetscFunctionReturn(0); 5048 } 5049 5050 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5051 { 5052 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5053 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5054 Mat new_mat,lA; 5055 IS is_local,is_global; 5056 PetscInt local_size; 5057 PetscBool isseqaij; 5058 5059 PetscFunctionBegin; 5060 PetscCall(MatDestroy(&pcbddc->local_mat)); 5061 PetscCall(MatGetSize(matis->A,&local_size,NULL)); 5062 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local)); 5063 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping,is_local,&is_global)); 5064 PetscCall(ISDestroy(&is_local)); 5065 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat)); 5066 PetscCall(ISDestroy(&is_global)); 5067 5068 if (pcbddc->dbg_flag) { 5069 Vec x,x_change; 5070 PetscReal error; 5071 5072 PetscCall(MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change)); 5073 PetscCall(VecSetRandom(x,NULL)); 5074 PetscCall(MatMult(ChangeOfBasisMatrix,x,x_change)); 5075 PetscCall(VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5076 PetscCall(VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5077 PetscCall(MatMult(new_mat,matis->x,matis->y)); 5078 if (!pcbddc->change_interior) { 5079 const PetscScalar *x,*y,*v; 5080 PetscReal lerror = 0.; 5081 PetscInt i; 5082 5083 PetscCall(VecGetArrayRead(matis->x,&x)); 5084 PetscCall(VecGetArrayRead(matis->y,&y)); 5085 PetscCall(VecGetArrayRead(matis->counter,&v)); 5086 for (i=0;i<local_size;i++) 5087 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5088 lerror = PetscAbsScalar(x[i]-y[i]); 5089 PetscCall(VecRestoreArrayRead(matis->x,&x)); 5090 PetscCall(VecRestoreArrayRead(matis->y,&y)); 5091 PetscCall(VecRestoreArrayRead(matis->counter,&v)); 5092 PetscCall(MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)pc))); 5093 if (error > PETSC_SMALL) { 5094 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5095 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 5096 } else { 5097 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 5098 } 5099 } 5100 } 5101 PetscCall(VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5102 PetscCall(VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5103 PetscCall(VecAXPY(x,-1.0,x_change)); 5104 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 5105 if (error > PETSC_SMALL) { 5106 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5107 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5108 } else { 5109 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5110 } 5111 } 5112 PetscCall(VecDestroy(&x)); 5113 PetscCall(VecDestroy(&x_change)); 5114 } 5115 5116 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5117 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA)); 5118 5119 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5120 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij)); 5121 if (isseqaij) { 5122 PetscCall(MatDestroy(&pcbddc->local_mat)); 5123 PetscCall(MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5124 if (lA) { 5125 Mat work; 5126 PetscCall(MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5127 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5128 PetscCall(MatDestroy(&work)); 5129 } 5130 } else { 5131 Mat work_mat; 5132 5133 PetscCall(MatDestroy(&pcbddc->local_mat)); 5134 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5135 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5136 PetscCall(MatDestroy(&work_mat)); 5137 if (lA) { 5138 Mat work; 5139 PetscCall(MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5140 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5141 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5142 PetscCall(MatDestroy(&work)); 5143 } 5144 } 5145 if (matis->A->symmetric_set) { 5146 PetscCall(MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric)); 5147 #if !defined(PETSC_USE_COMPLEX) 5148 PetscCall(MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric)); 5149 #endif 5150 } 5151 PetscCall(MatDestroy(&new_mat)); 5152 PetscFunctionReturn(0); 5153 } 5154 5155 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5156 { 5157 PC_IS* pcis = (PC_IS*)(pc->data); 5158 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5159 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5160 PetscInt *idx_R_local=NULL; 5161 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5162 PetscInt vbs,bs; 5163 PetscBT bitmask=NULL; 5164 5165 PetscFunctionBegin; 5166 /* 5167 No need to setup local scatters if 5168 - primal space is unchanged 5169 AND 5170 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5171 AND 5172 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5173 */ 5174 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5175 PetscFunctionReturn(0); 5176 } 5177 /* destroy old objects */ 5178 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5179 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5180 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5181 /* Set Non-overlapping dimensions */ 5182 n_B = pcis->n_B; 5183 n_D = pcis->n - n_B; 5184 n_vertices = pcbddc->n_vertices; 5185 5186 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5187 5188 /* create auxiliary bitmask and allocate workspace */ 5189 if (!sub_schurs || !sub_schurs->reuse_solver) { 5190 PetscCall(PetscMalloc1(pcis->n-n_vertices,&idx_R_local)); 5191 PetscCall(PetscBTCreate(pcis->n,&bitmask)); 5192 for (i=0;i<n_vertices;i++) { 5193 PetscCall(PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i])); 5194 } 5195 5196 for (i=0, n_R=0; i<pcis->n; i++) { 5197 if (!PetscBTLookup(bitmask,i)) { 5198 idx_R_local[n_R++] = i; 5199 } 5200 } 5201 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5202 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5203 5204 PetscCall(ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5205 PetscCall(ISGetLocalSize(reuse_solver->is_R,&n_R)); 5206 } 5207 5208 /* Block code */ 5209 vbs = 1; 5210 PetscCall(MatGetBlockSize(pcbddc->local_mat,&bs)); 5211 if (bs>1 && !(n_vertices%bs)) { 5212 PetscBool is_blocked = PETSC_TRUE; 5213 PetscInt *vary; 5214 if (!sub_schurs || !sub_schurs->reuse_solver) { 5215 PetscCall(PetscMalloc1(pcis->n/bs,&vary)); 5216 PetscCall(PetscArrayzero(vary,pcis->n/bs)); 5217 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5218 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5219 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5220 for (i=0; i<pcis->n/bs; i++) { 5221 if (vary[i]!=0 && vary[i]!=bs) { 5222 is_blocked = PETSC_FALSE; 5223 break; 5224 } 5225 } 5226 PetscCall(PetscFree(vary)); 5227 } else { 5228 /* Verify directly the R set */ 5229 for (i=0; i<n_R/bs; i++) { 5230 PetscInt j,node=idx_R_local[bs*i]; 5231 for (j=1; j<bs; j++) { 5232 if (node != idx_R_local[bs*i+j]-j) { 5233 is_blocked = PETSC_FALSE; 5234 break; 5235 } 5236 } 5237 } 5238 } 5239 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5240 vbs = bs; 5241 for (i=0;i<n_R/vbs;i++) { 5242 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5243 } 5244 } 5245 } 5246 PetscCall(ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local)); 5247 if (sub_schurs && sub_schurs->reuse_solver) { 5248 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5249 5250 PetscCall(ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5251 PetscCall(ISDestroy(&reuse_solver->is_R)); 5252 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5253 reuse_solver->is_R = pcbddc->is_R_local; 5254 } else { 5255 PetscCall(PetscFree(idx_R_local)); 5256 } 5257 5258 /* print some info if requested */ 5259 if (pcbddc->dbg_flag) { 5260 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5261 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5262 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5263 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank)); 5264 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B)); 5265 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size)); 5266 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5267 } 5268 5269 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5270 if (!sub_schurs || !sub_schurs->reuse_solver) { 5271 IS is_aux1,is_aux2; 5272 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5273 5274 PetscCall(ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5275 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array1)); 5276 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array2)); 5277 PetscCall(ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5278 for (i=0; i<n_D; i++) { 5279 PetscCall(PetscBTSet(bitmask,is_indices[i])); 5280 } 5281 PetscCall(ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5282 for (i=0, j=0; i<n_R; i++) { 5283 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5284 aux_array1[j++] = i; 5285 } 5286 } 5287 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5288 PetscCall(ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5289 for (i=0, j=0; i<n_B; i++) { 5290 if (!PetscBTLookup(bitmask,is_indices[i])) { 5291 aux_array2[j++] = i; 5292 } 5293 } 5294 PetscCall(ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5295 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2)); 5296 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B)); 5297 PetscCall(ISDestroy(&is_aux1)); 5298 PetscCall(ISDestroy(&is_aux2)); 5299 5300 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5301 PetscCall(PetscMalloc1(n_D,&aux_array1)); 5302 for (i=0, j=0; i<n_R; i++) { 5303 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5304 aux_array1[j++] = i; 5305 } 5306 } 5307 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5308 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5309 PetscCall(ISDestroy(&is_aux1)); 5310 } 5311 PetscCall(PetscBTDestroy(&bitmask)); 5312 PetscCall(ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5313 } else { 5314 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5315 IS tis; 5316 PetscInt schur_size; 5317 5318 PetscCall(ISGetLocalSize(reuse_solver->is_B,&schur_size)); 5319 PetscCall(ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis)); 5320 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B)); 5321 PetscCall(ISDestroy(&tis)); 5322 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5323 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis)); 5324 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5325 PetscCall(ISDestroy(&tis)); 5326 } 5327 } 5328 PetscFunctionReturn(0); 5329 } 5330 5331 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5332 { 5333 MatNullSpace NullSpace; 5334 Mat dmat; 5335 const Vec *nullvecs; 5336 Vec v,v2,*nullvecs2; 5337 VecScatter sct = NULL; 5338 PetscContainer c; 5339 PetscScalar *ddata; 5340 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5341 PetscBool nnsp_has_cnst; 5342 5343 PetscFunctionBegin; 5344 if (!is && !B) { /* MATIS */ 5345 Mat_IS* matis = (Mat_IS*)A->data; 5346 5347 if (!B) { 5348 PetscCall(MatISGetLocalMat(A,&B)); 5349 } 5350 sct = matis->cctx; 5351 PetscCall(PetscObjectReference((PetscObject)sct)); 5352 } else { 5353 PetscCall(MatGetNullSpace(B,&NullSpace)); 5354 if (!NullSpace) { 5355 PetscCall(MatGetNearNullSpace(B,&NullSpace)); 5356 } 5357 if (NullSpace) PetscFunctionReturn(0); 5358 } 5359 PetscCall(MatGetNullSpace(A,&NullSpace)); 5360 if (!NullSpace) { 5361 PetscCall(MatGetNearNullSpace(A,&NullSpace)); 5362 } 5363 if (!NullSpace) PetscFunctionReturn(0); 5364 5365 PetscCall(MatCreateVecs(A,&v,NULL)); 5366 PetscCall(MatCreateVecs(B,&v2,NULL)); 5367 if (!sct) { 5368 PetscCall(VecScatterCreate(v,is,v2,NULL,&sct)); 5369 } 5370 PetscCall(MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs)); 5371 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5372 PetscCall(PetscMalloc1(bsiz,&nullvecs2)); 5373 PetscCall(VecGetBlockSize(v2,&bs)); 5374 PetscCall(VecGetSize(v2,&N)); 5375 PetscCall(VecGetLocalSize(v2,&n)); 5376 PetscCall(PetscMalloc1(n*bsiz,&ddata)); 5377 for (k=0;k<nnsp_size;k++) { 5378 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k])); 5379 PetscCall(VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5380 PetscCall(VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5381 } 5382 if (nnsp_has_cnst) { 5383 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size])); 5384 PetscCall(VecSet(nullvecs2[nnsp_size],1.0)); 5385 } 5386 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2)); 5387 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace)); 5388 5389 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat)); 5390 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B),&c)); 5391 PetscCall(PetscContainerSetPointer(c,ddata)); 5392 PetscCall(PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault)); 5393 PetscCall(PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c)); 5394 PetscCall(PetscContainerDestroy(&c)); 5395 PetscCall(PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat)); 5396 PetscCall(MatDestroy(&dmat)); 5397 5398 for (k=0;k<bsiz;k++) { 5399 PetscCall(VecDestroy(&nullvecs2[k])); 5400 } 5401 PetscCall(PetscFree(nullvecs2)); 5402 PetscCall(MatSetNearNullSpace(B,NullSpace)); 5403 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5404 PetscCall(VecDestroy(&v)); 5405 PetscCall(VecDestroy(&v2)); 5406 PetscCall(VecScatterDestroy(&sct)); 5407 PetscFunctionReturn(0); 5408 } 5409 5410 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5411 { 5412 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5413 PC_IS *pcis = (PC_IS*)pc->data; 5414 PC pc_temp; 5415 Mat A_RR; 5416 MatNullSpace nnsp; 5417 MatReuse reuse; 5418 PetscScalar m_one = -1.0; 5419 PetscReal value; 5420 PetscInt n_D,n_R; 5421 PetscBool issbaij,opts; 5422 void (*f)(void) = NULL; 5423 char dir_prefix[256],neu_prefix[256],str_level[16]; 5424 size_t len; 5425 5426 PetscFunctionBegin; 5427 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5428 /* approximate solver, propagate NearNullSpace if needed */ 5429 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5430 MatNullSpace gnnsp1,gnnsp2; 5431 PetscBool lhas,ghas; 5432 5433 PetscCall(MatGetNearNullSpace(pcbddc->local_mat,&nnsp)); 5434 PetscCall(MatGetNearNullSpace(pc->pmat,&gnnsp1)); 5435 PetscCall(MatGetNullSpace(pc->pmat,&gnnsp2)); 5436 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5437 PetscCall(MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 5438 if (!ghas && (gnnsp1 || gnnsp2)) { 5439 PetscCall(MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL)); 5440 } 5441 } 5442 5443 /* compute prefixes */ 5444 PetscCall(PetscStrcpy(dir_prefix,"")); 5445 PetscCall(PetscStrcpy(neu_prefix,"")); 5446 if (!pcbddc->current_level) { 5447 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix))); 5448 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix))); 5449 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5450 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5451 } else { 5452 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 5453 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 5454 len -= 15; /* remove "pc_bddc_coarse_" */ 5455 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5456 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5457 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5458 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1)); 5459 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1)); 5460 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5461 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5462 PetscCall(PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix))); 5463 PetscCall(PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix))); 5464 } 5465 5466 /* DIRICHLET PROBLEM */ 5467 if (dirichlet) { 5468 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5469 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5470 PetscCheck(sub_schurs && sub_schurs->reuse_solver,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5471 if (pcbddc->dbg_flag) { 5472 Mat A_IIn; 5473 5474 PetscCall(PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn)); 5475 PetscCall(MatDestroy(&pcis->A_II)); 5476 pcis->A_II = A_IIn; 5477 } 5478 } 5479 if (pcbddc->local_mat->symmetric_set) { 5480 PetscCall(MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric)); 5481 } 5482 /* Matrix for Dirichlet problem is pcis->A_II */ 5483 n_D = pcis->n - pcis->n_B; 5484 opts = PETSC_FALSE; 5485 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5486 opts = PETSC_TRUE; 5487 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D)); 5488 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1)); 5489 /* default */ 5490 PetscCall(KSPSetType(pcbddc->ksp_D,KSPPREONLY)); 5491 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix)); 5492 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij)); 5493 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5494 if (issbaij) { 5495 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5496 } else { 5497 PetscCall(PCSetType(pc_temp,PCLU)); 5498 } 5499 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure)); 5500 } 5501 PetscCall(MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix)); 5502 PetscCall(KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II)); 5503 /* Allow user's customization */ 5504 if (opts) { 5505 PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5506 } 5507 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5508 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5509 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II)); 5510 } 5511 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5512 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5513 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5514 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5515 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5516 const PetscInt *idxs; 5517 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5518 5519 PetscCall(ISGetLocalSize(pcis->is_I_local,&nl)); 5520 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 5521 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5522 for (i=0;i<nl;i++) { 5523 for (d=0;d<cdim;d++) { 5524 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5525 } 5526 } 5527 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 5528 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5529 PetscCall(PetscFree(scoords)); 5530 } 5531 if (sub_schurs && sub_schurs->reuse_solver) { 5532 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5533 5534 PetscCall(KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver)); 5535 } 5536 5537 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5538 if (!n_D) { 5539 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5540 PetscCall(PCSetType(pc_temp,PCNONE)); 5541 } 5542 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5543 /* set ksp_D into pcis data */ 5544 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5545 PetscCall(KSPDestroy(&pcis->ksp_D)); 5546 pcis->ksp_D = pcbddc->ksp_D; 5547 } 5548 5549 /* NEUMANN PROBLEM */ 5550 A_RR = NULL; 5551 if (neumann) { 5552 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5553 PetscInt ibs,mbs; 5554 PetscBool issbaij, reuse_neumann_solver; 5555 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5556 5557 reuse_neumann_solver = PETSC_FALSE; 5558 if (sub_schurs && sub_schurs->reuse_solver) { 5559 IS iP; 5560 5561 reuse_neumann_solver = PETSC_TRUE; 5562 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP)); 5563 if (iP) reuse_neumann_solver = PETSC_FALSE; 5564 } 5565 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5566 PetscCall(ISGetSize(pcbddc->is_R_local,&n_R)); 5567 if (pcbddc->ksp_R) { /* already created ksp */ 5568 PetscInt nn_R; 5569 PetscCall(KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR)); 5570 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5571 PetscCall(MatGetSize(A_RR,&nn_R,NULL)); 5572 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5573 PetscCall(KSPReset(pcbddc->ksp_R)); 5574 PetscCall(MatDestroy(&A_RR)); 5575 reuse = MAT_INITIAL_MATRIX; 5576 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5577 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5578 PetscCall(MatDestroy(&A_RR)); 5579 reuse = MAT_INITIAL_MATRIX; 5580 } else { /* safe to reuse the matrix */ 5581 reuse = MAT_REUSE_MATRIX; 5582 } 5583 } 5584 /* last check */ 5585 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5586 PetscCall(MatDestroy(&A_RR)); 5587 reuse = MAT_INITIAL_MATRIX; 5588 } 5589 } else { /* first time, so we need to create the matrix */ 5590 reuse = MAT_INITIAL_MATRIX; 5591 } 5592 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5593 TODO: Get Rid of these conversions */ 5594 PetscCall(MatGetBlockSize(pcbddc->local_mat,&mbs)); 5595 PetscCall(ISGetBlockSize(pcbddc->is_R_local,&ibs)); 5596 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij)); 5597 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5598 if (matis->A == pcbddc->local_mat) { 5599 PetscCall(MatDestroy(&pcbddc->local_mat)); 5600 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5601 } else { 5602 PetscCall(MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5603 } 5604 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5605 if (matis->A == pcbddc->local_mat) { 5606 PetscCall(MatDestroy(&pcbddc->local_mat)); 5607 PetscCall(MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5608 } else { 5609 PetscCall(MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5610 } 5611 } 5612 /* extract A_RR */ 5613 if (reuse_neumann_solver) { 5614 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5615 5616 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5617 PetscCall(MatDestroy(&A_RR)); 5618 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5619 PetscCall(PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR)); 5620 } else { 5621 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR)); 5622 } 5623 } else { 5624 PetscCall(MatDestroy(&A_RR)); 5625 PetscCall(PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL)); 5626 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5627 } 5628 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5629 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR)); 5630 } 5631 if (pcbddc->local_mat->symmetric_set) { 5632 PetscCall(MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric)); 5633 } 5634 opts = PETSC_FALSE; 5635 if (!pcbddc->ksp_R) { /* create object if not present */ 5636 opts = PETSC_TRUE; 5637 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R)); 5638 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1)); 5639 /* default */ 5640 PetscCall(KSPSetType(pcbddc->ksp_R,KSPPREONLY)); 5641 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix)); 5642 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5643 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij)); 5644 if (issbaij) { 5645 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5646 } else { 5647 PetscCall(PCSetType(pc_temp,PCLU)); 5648 } 5649 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure)); 5650 } 5651 PetscCall(KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR)); 5652 PetscCall(MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix)); 5653 if (opts) { /* Allow user's customization once */ 5654 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5655 } 5656 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5657 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5658 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR)); 5659 } 5660 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5661 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5662 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5663 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5664 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5665 const PetscInt *idxs; 5666 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5667 5668 PetscCall(ISGetLocalSize(pcbddc->is_R_local,&nl)); 5669 PetscCall(ISGetIndices(pcbddc->is_R_local,&idxs)); 5670 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5671 for (i=0;i<nl;i++) { 5672 for (d=0;d<cdim;d++) { 5673 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5674 } 5675 } 5676 PetscCall(ISRestoreIndices(pcbddc->is_R_local,&idxs)); 5677 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5678 PetscCall(PetscFree(scoords)); 5679 } 5680 5681 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5682 if (!n_R) { 5683 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5684 PetscCall(PCSetType(pc_temp,PCNONE)); 5685 } 5686 /* Reuse solver if it is present */ 5687 if (reuse_neumann_solver) { 5688 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5689 5690 PetscCall(KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver)); 5691 } 5692 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5693 } 5694 5695 if (pcbddc->dbg_flag) { 5696 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5697 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5698 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5699 } 5700 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5701 5702 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5703 if (pcbddc->NullSpace_corr[0]) { 5704 PetscCall(PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE)); 5705 } 5706 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5707 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1])); 5708 } 5709 if (neumann && pcbddc->NullSpace_corr[2]) { 5710 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3])); 5711 } 5712 /* check Dirichlet and Neumann solvers */ 5713 if (pcbddc->dbg_flag) { 5714 if (dirichlet) { /* Dirichlet */ 5715 PetscCall(VecSetRandom(pcis->vec1_D,NULL)); 5716 PetscCall(MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D)); 5717 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D)); 5718 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 5719 PetscCall(VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D)); 5720 PetscCall(VecNorm(pcis->vec1_D,NORM_INFINITY,&value)); 5721 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value)); 5722 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5723 } 5724 if (neumann) { /* Neumann */ 5725 PetscCall(VecSetRandom(pcbddc->vec1_R,NULL)); 5726 PetscCall(MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R)); 5727 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R)); 5728 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 5729 PetscCall(VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R)); 5730 PetscCall(VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value)); 5731 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value)); 5732 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5733 } 5734 } 5735 /* free Neumann problem's matrix */ 5736 PetscCall(MatDestroy(&A_RR)); 5737 PetscFunctionReturn(0); 5738 } 5739 5740 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5741 { 5742 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5743 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5744 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5745 5746 PetscFunctionBegin; 5747 if (!reuse_solver) { 5748 PetscCall(VecSet(pcbddc->vec1_R,0.)); 5749 } 5750 if (!pcbddc->switch_static) { 5751 if (applytranspose && pcbddc->local_auxmat1) { 5752 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C)); 5753 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5754 } 5755 if (!reuse_solver) { 5756 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5757 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5758 } else { 5759 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5760 5761 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5762 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5763 } 5764 } else { 5765 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5766 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5767 PetscCall(VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5768 PetscCall(VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5769 if (applytranspose && pcbddc->local_auxmat1) { 5770 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C)); 5771 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5772 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5773 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5774 } 5775 } 5776 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5777 if (!reuse_solver || pcbddc->switch_static) { 5778 if (applytranspose) { 5779 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5780 } else { 5781 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5782 } 5783 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R)); 5784 } else { 5785 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5786 5787 if (applytranspose) { 5788 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5789 } else { 5790 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5791 } 5792 } 5793 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5794 PetscCall(VecSet(inout_B,0.)); 5795 if (!pcbddc->switch_static) { 5796 if (!reuse_solver) { 5797 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5798 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5799 } else { 5800 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5801 5802 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5803 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5804 } 5805 if (!applytranspose && pcbddc->local_auxmat1) { 5806 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5807 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B)); 5808 } 5809 } else { 5810 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5811 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5812 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5813 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5814 if (!applytranspose && pcbddc->local_auxmat1) { 5815 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5816 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R)); 5817 } 5818 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5819 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5820 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5821 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5822 } 5823 PetscFunctionReturn(0); 5824 } 5825 5826 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5827 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5828 { 5829 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5830 PC_IS* pcis = (PC_IS*) (pc->data); 5831 const PetscScalar zero = 0.0; 5832 5833 PetscFunctionBegin; 5834 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5835 if (!pcbddc->benign_apply_coarse_only) { 5836 if (applytranspose) { 5837 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P)); 5838 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5839 } else { 5840 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P)); 5841 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5842 } 5843 } else { 5844 PetscCall(VecSet(pcbddc->vec1_P,zero)); 5845 } 5846 5847 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5848 if (pcbddc->benign_n) { 5849 PetscScalar *array; 5850 PetscInt j; 5851 5852 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5853 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5854 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5855 } 5856 5857 /* start communications from local primal nodes to rhs of coarse solver */ 5858 PetscCall(VecSet(pcbddc->coarse_vec,zero)); 5859 PetscCall(PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD)); 5860 PetscCall(PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD)); 5861 5862 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5863 if (pcbddc->coarse_ksp) { 5864 Mat coarse_mat; 5865 Vec rhs,sol; 5866 MatNullSpace nullsp; 5867 PetscBool isbddc = PETSC_FALSE; 5868 5869 if (pcbddc->benign_have_null) { 5870 PC coarse_pc; 5871 5872 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5873 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 5874 /* we need to propagate to coarser levels the need for a possible benign correction */ 5875 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5876 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5877 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5878 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5879 } 5880 } 5881 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&rhs)); 5882 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&sol)); 5883 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 5884 if (applytranspose) { 5885 PetscCheck(!pcbddc->benign_apply_coarse_only,PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5886 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5887 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol)); 5888 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5889 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5890 PetscCall(MatGetTransposeNullSpace(coarse_mat,&nullsp)); 5891 if (nullsp) { 5892 PetscCall(MatNullSpaceRemove(nullsp,sol)); 5893 } 5894 } else { 5895 PetscCall(MatGetNullSpace(coarse_mat,&nullsp)); 5896 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5897 PC coarse_pc; 5898 5899 if (nullsp) { 5900 PetscCall(MatNullSpaceRemove(nullsp,rhs)); 5901 } 5902 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5903 PetscCall(PCPreSolve(coarse_pc,pcbddc->coarse_ksp)); 5904 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol)); 5905 PetscCall(PCPostSolve(coarse_pc,pcbddc->coarse_ksp)); 5906 } else { 5907 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5908 PetscCall(KSPSolve(pcbddc->coarse_ksp,rhs,sol)); 5909 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5910 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5911 if (nullsp) { 5912 PetscCall(MatNullSpaceRemove(nullsp,sol)); 5913 } 5914 } 5915 } 5916 /* we don't need the benign correction at coarser levels anymore */ 5917 if (pcbddc->benign_have_null && isbddc) { 5918 PC coarse_pc; 5919 PC_BDDC* coarsepcbddc; 5920 5921 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5922 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5923 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5924 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5925 } 5926 } 5927 5928 /* Local solution on R nodes */ 5929 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5930 PetscCall(PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose)); 5931 } 5932 /* communications from coarse sol to local primal nodes */ 5933 PetscCall(PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE)); 5934 PetscCall(PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE)); 5935 5936 /* Sum contributions from the two levels */ 5937 if (!pcbddc->benign_apply_coarse_only) { 5938 if (applytranspose) { 5939 PetscCall(MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5940 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5941 } else { 5942 PetscCall(MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5943 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5944 } 5945 /* store p0 */ 5946 if (pcbddc->benign_n) { 5947 PetscScalar *array; 5948 PetscInt j; 5949 5950 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5951 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5952 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5953 } 5954 } else { /* expand the coarse solution */ 5955 if (applytranspose) { 5956 PetscCall(MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B)); 5957 } else { 5958 PetscCall(MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B)); 5959 } 5960 } 5961 PetscFunctionReturn(0); 5962 } 5963 5964 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5965 { 5966 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5967 Vec from,to; 5968 const PetscScalar *array; 5969 5970 PetscFunctionBegin; 5971 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5972 from = pcbddc->coarse_vec; 5973 to = pcbddc->vec1_P; 5974 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5975 Vec tvec; 5976 5977 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5978 PetscCall(VecResetArray(tvec)); 5979 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&tvec)); 5980 PetscCall(VecGetArrayRead(tvec,&array)); 5981 PetscCall(VecPlaceArray(from,array)); 5982 PetscCall(VecRestoreArrayRead(tvec,&array)); 5983 } 5984 } else { /* from local to global -> put data in coarse right hand side */ 5985 from = pcbddc->vec1_P; 5986 to = pcbddc->coarse_vec; 5987 } 5988 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 5989 PetscFunctionReturn(0); 5990 } 5991 5992 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5993 { 5994 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5995 Vec from,to; 5996 const PetscScalar *array; 5997 5998 PetscFunctionBegin; 5999 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6000 from = pcbddc->coarse_vec; 6001 to = pcbddc->vec1_P; 6002 } else { /* from local to global -> put data in coarse right hand side */ 6003 from = pcbddc->vec1_P; 6004 to = pcbddc->coarse_vec; 6005 } 6006 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 6007 if (smode == SCATTER_FORWARD) { 6008 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6009 Vec tvec; 6010 6011 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 6012 PetscCall(VecGetArrayRead(to,&array)); 6013 PetscCall(VecPlaceArray(tvec,array)); 6014 PetscCall(VecRestoreArrayRead(to,&array)); 6015 } 6016 } else { 6017 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6018 PetscCall(VecResetArray(from)); 6019 } 6020 } 6021 PetscFunctionReturn(0); 6022 } 6023 6024 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6025 { 6026 PetscErrorCode ierr; 6027 PC_IS* pcis = (PC_IS*)(pc->data); 6028 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6029 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6030 /* one and zero */ 6031 PetscScalar one=1.0,zero=0.0; 6032 /* space to store constraints and their local indices */ 6033 PetscScalar *constraints_data; 6034 PetscInt *constraints_idxs,*constraints_idxs_B; 6035 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6036 PetscInt *constraints_n; 6037 /* iterators */ 6038 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6039 /* BLAS integers */ 6040 PetscBLASInt lwork,lierr; 6041 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6042 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6043 /* reuse */ 6044 PetscInt olocal_primal_size,olocal_primal_size_cc; 6045 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6046 /* change of basis */ 6047 PetscBool qr_needed; 6048 PetscBT change_basis,qr_needed_idx; 6049 /* auxiliary stuff */ 6050 PetscInt *nnz,*is_indices; 6051 PetscInt ncc; 6052 /* some quantities */ 6053 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6054 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6055 PetscReal tol; /* tolerance for retaining eigenmodes */ 6056 6057 PetscFunctionBegin; 6058 tol = PetscSqrtReal(PETSC_SMALL); 6059 /* Destroy Mat objects computed previously */ 6060 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6061 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6062 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6063 /* save info on constraints from previous setup (if any) */ 6064 olocal_primal_size = pcbddc->local_primal_size; 6065 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6066 PetscCall(PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult)); 6067 PetscCall(PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc)); 6068 PetscCall(PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc)); 6069 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 6070 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6071 6072 if (!pcbddc->adaptive_selection) { 6073 IS ISForVertices,*ISForFaces,*ISForEdges; 6074 MatNullSpace nearnullsp; 6075 const Vec *nearnullvecs; 6076 Vec *localnearnullsp; 6077 PetscScalar *array; 6078 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 6079 PetscBool nnsp_has_cnst; 6080 /* LAPACK working arrays for SVD or POD */ 6081 PetscBool skip_lapack,boolforchange; 6082 PetscScalar *work; 6083 PetscReal *singular_vals; 6084 #if defined(PETSC_USE_COMPLEX) 6085 PetscReal *rwork; 6086 #endif 6087 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6088 PetscBLASInt dummy_int=1; 6089 PetscScalar dummy_scalar=1.; 6090 PetscBool use_pod = PETSC_FALSE; 6091 6092 /* MKL SVD with same input gives different results on different processes! */ 6093 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6094 use_pod = PETSC_TRUE; 6095 #endif 6096 /* Get index sets for faces, edges and vertices from graph */ 6097 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices)); 6098 /* print some info */ 6099 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6100 PetscInt nv; 6101 6102 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 6103 PetscCall(ISGetSize(ISForVertices,&nv)); 6104 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6105 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6106 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices)); 6107 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges)); 6108 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces)); 6109 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6110 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6111 } 6112 6113 /* free unneeded index sets */ 6114 if (!pcbddc->use_vertices) { 6115 PetscCall(ISDestroy(&ISForVertices)); 6116 } 6117 if (!pcbddc->use_edges) { 6118 for (i=0;i<n_ISForEdges;i++) { 6119 PetscCall(ISDestroy(&ISForEdges[i])); 6120 } 6121 PetscCall(PetscFree(ISForEdges)); 6122 n_ISForEdges = 0; 6123 } 6124 if (!pcbddc->use_faces) { 6125 for (i=0;i<n_ISForFaces;i++) { 6126 PetscCall(ISDestroy(&ISForFaces[i])); 6127 } 6128 PetscCall(PetscFree(ISForFaces)); 6129 n_ISForFaces = 0; 6130 } 6131 6132 /* check if near null space is attached to global mat */ 6133 if (pcbddc->use_nnsp) { 6134 PetscCall(MatGetNearNullSpace(pc->pmat,&nearnullsp)); 6135 } else nearnullsp = NULL; 6136 6137 if (nearnullsp) { 6138 PetscCall(MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs)); 6139 /* remove any stored info */ 6140 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6141 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6142 /* store information for BDDC solver reuse */ 6143 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6144 pcbddc->onearnullspace = nearnullsp; 6145 PetscCall(PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state)); 6146 for (i=0;i<nnsp_size;i++) { 6147 PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i])); 6148 } 6149 } else { /* if near null space is not provided BDDC uses constants by default */ 6150 nnsp_size = 0; 6151 nnsp_has_cnst = PETSC_TRUE; 6152 } 6153 /* get max number of constraints on a single cc */ 6154 max_constraints = nnsp_size; 6155 if (nnsp_has_cnst) max_constraints++; 6156 6157 /* 6158 Evaluate maximum storage size needed by the procedure 6159 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6160 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6161 There can be multiple constraints per connected component 6162 */ 6163 n_vertices = 0; 6164 if (ISForVertices) { 6165 PetscCall(ISGetSize(ISForVertices,&n_vertices)); 6166 } 6167 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6168 PetscCall(PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n)); 6169 6170 total_counts = n_ISForFaces+n_ISForEdges; 6171 total_counts *= max_constraints; 6172 total_counts += n_vertices; 6173 PetscCall(PetscBTCreate(total_counts,&change_basis)); 6174 6175 total_counts = 0; 6176 max_size_of_constraint = 0; 6177 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6178 IS used_is; 6179 if (i<n_ISForEdges) { 6180 used_is = ISForEdges[i]; 6181 } else { 6182 used_is = ISForFaces[i-n_ISForEdges]; 6183 } 6184 PetscCall(ISGetSize(used_is,&j)); 6185 total_counts += j; 6186 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6187 } 6188 PetscCall(PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B)); 6189 6190 /* get local part of global near null space vectors */ 6191 PetscCall(PetscMalloc1(nnsp_size,&localnearnullsp)); 6192 for (k=0;k<nnsp_size;k++) { 6193 PetscCall(VecDuplicate(pcis->vec1_N,&localnearnullsp[k])); 6194 PetscCall(VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6195 PetscCall(VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6196 } 6197 6198 /* whether or not to skip lapack calls */ 6199 skip_lapack = PETSC_TRUE; 6200 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6201 6202 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6203 if (!skip_lapack) { 6204 PetscScalar temp_work; 6205 6206 if (use_pod) { 6207 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6208 PetscCall(PetscMalloc1(max_constraints*max_constraints,&correlation_mat)); 6209 PetscCall(PetscMalloc1(max_constraints,&singular_vals)); 6210 PetscCall(PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis)); 6211 #if defined(PETSC_USE_COMPLEX) 6212 PetscCall(PetscMalloc1(3*max_constraints,&rwork)); 6213 #endif 6214 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6215 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6216 PetscCall(PetscBLASIntCast(max_constraints,&Blas_LDA)); 6217 lwork = -1; 6218 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6219 #if !defined(PETSC_USE_COMPLEX) 6220 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6221 #else 6222 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6223 #endif 6224 PetscCall(PetscFPTrapPop()); 6225 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6226 } else { 6227 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6228 /* SVD */ 6229 PetscInt max_n,min_n; 6230 max_n = max_size_of_constraint; 6231 min_n = max_constraints; 6232 if (max_size_of_constraint < max_constraints) { 6233 min_n = max_size_of_constraint; 6234 max_n = max_constraints; 6235 } 6236 PetscCall(PetscMalloc1(min_n,&singular_vals)); 6237 #if defined(PETSC_USE_COMPLEX) 6238 PetscCall(PetscMalloc1(5*min_n,&rwork)); 6239 #endif 6240 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6241 lwork = -1; 6242 PetscCall(PetscBLASIntCast(max_n,&Blas_M)); 6243 PetscCall(PetscBLASIntCast(min_n,&Blas_N)); 6244 PetscCall(PetscBLASIntCast(max_n,&Blas_LDA)); 6245 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6246 #if !defined(PETSC_USE_COMPLEX) 6247 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6248 #else 6249 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6250 #endif 6251 PetscCall(PetscFPTrapPop()); 6252 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6253 #else 6254 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6255 #endif /* on missing GESVD */ 6256 } 6257 /* Allocate optimal workspace */ 6258 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork)); 6259 PetscCall(PetscMalloc1(lwork,&work)); 6260 } 6261 /* Now we can loop on constraining sets */ 6262 total_counts = 0; 6263 constraints_idxs_ptr[0] = 0; 6264 constraints_data_ptr[0] = 0; 6265 /* vertices */ 6266 if (n_vertices) { 6267 PetscCall(ISGetIndices(ISForVertices,(const PetscInt**)&is_indices)); 6268 PetscCall(PetscArraycpy(constraints_idxs,is_indices,n_vertices)); 6269 for (i=0;i<n_vertices;i++) { 6270 constraints_n[total_counts] = 1; 6271 constraints_data[total_counts] = 1.0; 6272 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6273 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6274 total_counts++; 6275 } 6276 PetscCall(ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices)); 6277 n_vertices = total_counts; 6278 } 6279 6280 /* edges and faces */ 6281 total_counts_cc = total_counts; 6282 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6283 IS used_is; 6284 PetscBool idxs_copied = PETSC_FALSE; 6285 6286 if (ncc<n_ISForEdges) { 6287 used_is = ISForEdges[ncc]; 6288 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6289 } else { 6290 used_is = ISForFaces[ncc-n_ISForEdges]; 6291 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6292 } 6293 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6294 6295 PetscCall(ISGetSize(used_is,&size_of_constraint)); 6296 PetscCall(ISGetIndices(used_is,(const PetscInt**)&is_indices)); 6297 /* change of basis should not be performed on local periodic nodes */ 6298 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6299 if (nnsp_has_cnst) { 6300 PetscScalar quad_value; 6301 6302 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6303 idxs_copied = PETSC_TRUE; 6304 6305 if (!pcbddc->use_nnsp_true) { 6306 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6307 } else { 6308 quad_value = 1.0; 6309 } 6310 for (j=0;j<size_of_constraint;j++) { 6311 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6312 } 6313 temp_constraints++; 6314 total_counts++; 6315 } 6316 for (k=0;k<nnsp_size;k++) { 6317 PetscReal real_value; 6318 PetscScalar *ptr_to_data; 6319 6320 PetscCall(VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6321 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6322 for (j=0;j<size_of_constraint;j++) { 6323 ptr_to_data[j] = array[is_indices[j]]; 6324 } 6325 PetscCall(VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6326 /* check if array is null on the connected component */ 6327 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6328 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6329 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6330 temp_constraints++; 6331 total_counts++; 6332 if (!idxs_copied) { 6333 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6334 idxs_copied = PETSC_TRUE; 6335 } 6336 } 6337 } 6338 PetscCall(ISRestoreIndices(used_is,(const PetscInt**)&is_indices)); 6339 valid_constraints = temp_constraints; 6340 if (!pcbddc->use_nnsp_true && temp_constraints) { 6341 if (temp_constraints == 1) { /* just normalize the constraint */ 6342 PetscScalar norm,*ptr_to_data; 6343 6344 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6345 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6346 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6347 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6348 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6349 } else { /* perform SVD */ 6350 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6351 6352 if (use_pod) { 6353 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6354 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6355 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6356 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6357 from that computed using LAPACKgesvd 6358 -> This is due to a different computation of eigenvectors in LAPACKheev 6359 -> The quality of the POD-computed basis will be the same */ 6360 PetscCall(PetscArrayzero(correlation_mat,temp_constraints*temp_constraints)); 6361 /* Store upper triangular part of correlation matrix */ 6362 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6363 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6364 for (j=0;j<temp_constraints;j++) { 6365 for (k=0;k<j+1;k++) { 6366 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6367 } 6368 } 6369 /* compute eigenvalues and eigenvectors of correlation matrix */ 6370 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6371 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDA)); 6372 #if !defined(PETSC_USE_COMPLEX) 6373 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6374 #else 6375 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6376 #endif 6377 PetscCall(PetscFPTrapPop()); 6378 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6379 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6380 j = 0; 6381 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6382 total_counts = total_counts-j; 6383 valid_constraints = temp_constraints-j; 6384 /* scale and copy POD basis into used quadrature memory */ 6385 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6386 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6387 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_K)); 6388 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6389 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDB)); 6390 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6391 if (j<temp_constraints) { 6392 PetscInt ii; 6393 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6394 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6395 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6396 PetscCall(PetscFPTrapPop()); 6397 for (k=0;k<temp_constraints-j;k++) { 6398 for (ii=0;ii<size_of_constraint;ii++) { 6399 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6400 } 6401 } 6402 } 6403 } else { 6404 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6405 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6406 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6407 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6408 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6409 #if !defined(PETSC_USE_COMPLEX) 6410 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6411 #else 6412 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6413 #endif 6414 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6415 PetscCall(PetscFPTrapPop()); 6416 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6417 k = temp_constraints; 6418 if (k > size_of_constraint) k = size_of_constraint; 6419 j = 0; 6420 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6421 valid_constraints = k-j; 6422 total_counts = total_counts-temp_constraints+valid_constraints; 6423 #else 6424 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6425 #endif /* on missing GESVD */ 6426 } 6427 } 6428 } 6429 /* update pointers information */ 6430 if (valid_constraints) { 6431 constraints_n[total_counts_cc] = valid_constraints; 6432 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6433 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6434 /* set change_of_basis flag */ 6435 if (boolforchange) { 6436 PetscBTSet(change_basis,total_counts_cc); 6437 } 6438 total_counts_cc++; 6439 } 6440 } 6441 /* free workspace */ 6442 if (!skip_lapack) { 6443 PetscCall(PetscFree(work)); 6444 #if defined(PETSC_USE_COMPLEX) 6445 PetscCall(PetscFree(rwork)); 6446 #endif 6447 PetscCall(PetscFree(singular_vals)); 6448 PetscCall(PetscFree(correlation_mat)); 6449 PetscCall(PetscFree(temp_basis)); 6450 } 6451 for (k=0;k<nnsp_size;k++) { 6452 PetscCall(VecDestroy(&localnearnullsp[k])); 6453 } 6454 PetscCall(PetscFree(localnearnullsp)); 6455 /* free index sets of faces, edges and vertices */ 6456 for (i=0;i<n_ISForFaces;i++) { 6457 PetscCall(ISDestroy(&ISForFaces[i])); 6458 } 6459 if (n_ISForFaces) { 6460 PetscCall(PetscFree(ISForFaces)); 6461 } 6462 for (i=0;i<n_ISForEdges;i++) { 6463 PetscCall(ISDestroy(&ISForEdges[i])); 6464 } 6465 if (n_ISForEdges) { 6466 PetscCall(PetscFree(ISForEdges)); 6467 } 6468 PetscCall(ISDestroy(&ISForVertices)); 6469 } else { 6470 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6471 6472 total_counts = 0; 6473 n_vertices = 0; 6474 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6475 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 6476 } 6477 max_constraints = 0; 6478 total_counts_cc = 0; 6479 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6480 total_counts += pcbddc->adaptive_constraints_n[i]; 6481 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6482 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6483 } 6484 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6485 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6486 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6487 constraints_data = pcbddc->adaptive_constraints_data; 6488 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6489 PetscCall(PetscMalloc1(total_counts_cc,&constraints_n)); 6490 total_counts_cc = 0; 6491 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6492 if (pcbddc->adaptive_constraints_n[i]) { 6493 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6494 } 6495 } 6496 6497 max_size_of_constraint = 0; 6498 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6499 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B)); 6500 /* Change of basis */ 6501 PetscCall(PetscBTCreate(total_counts_cc,&change_basis)); 6502 if (pcbddc->use_change_of_basis) { 6503 for (i=0;i<sub_schurs->n_subs;i++) { 6504 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6505 PetscCall(PetscBTSet(change_basis,i+n_vertices)); 6506 } 6507 } 6508 } 6509 } 6510 pcbddc->local_primal_size = total_counts; 6511 PetscCall(PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs)); 6512 6513 /* map constraints_idxs in boundary numbering */ 6514 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B)); 6515 PetscCheckFalse(i != constraints_idxs_ptr[total_counts_cc],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6516 6517 /* Create constraint matrix */ 6518 PetscCall(MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix)); 6519 PetscCall(MatSetType(pcbddc->ConstraintMatrix,MATAIJ)); 6520 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n)); 6521 6522 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6523 /* determine if a QR strategy is needed for change of basis */ 6524 qr_needed = pcbddc->use_qr_single; 6525 PetscCall(PetscBTCreate(total_counts_cc,&qr_needed_idx)); 6526 total_primal_vertices=0; 6527 pcbddc->local_primal_size_cc = 0; 6528 for (i=0;i<total_counts_cc;i++) { 6529 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6530 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6531 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6532 pcbddc->local_primal_size_cc += 1; 6533 } else if (PetscBTLookup(change_basis,i)) { 6534 for (k=0;k<constraints_n[i];k++) { 6535 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6536 } 6537 pcbddc->local_primal_size_cc += constraints_n[i]; 6538 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6539 PetscBTSet(qr_needed_idx,i); 6540 qr_needed = PETSC_TRUE; 6541 } 6542 } else { 6543 pcbddc->local_primal_size_cc += 1; 6544 } 6545 } 6546 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6547 pcbddc->n_vertices = total_primal_vertices; 6548 /* permute indices in order to have a sorted set of vertices */ 6549 PetscCall(PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs)); 6550 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult)); 6551 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices)); 6552 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6553 6554 /* nonzero structure of constraint matrix */ 6555 /* and get reference dof for local constraints */ 6556 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&nnz)); 6557 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6558 6559 j = total_primal_vertices; 6560 total_counts = total_primal_vertices; 6561 cum = total_primal_vertices; 6562 for (i=n_vertices;i<total_counts_cc;i++) { 6563 if (!PetscBTLookup(change_basis,i)) { 6564 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6565 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6566 cum++; 6567 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6568 for (k=0;k<constraints_n[i];k++) { 6569 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6570 nnz[j+k] = size_of_constraint; 6571 } 6572 j += constraints_n[i]; 6573 } 6574 } 6575 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz)); 6576 PetscCall(MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6577 PetscCall(PetscFree(nnz)); 6578 6579 /* set values in constraint matrix */ 6580 for (i=0;i<total_primal_vertices;i++) { 6581 PetscCall(MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES)); 6582 } 6583 total_counts = total_primal_vertices; 6584 for (i=n_vertices;i<total_counts_cc;i++) { 6585 if (!PetscBTLookup(change_basis,i)) { 6586 PetscInt *cols; 6587 6588 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6589 cols = constraints_idxs+constraints_idxs_ptr[i]; 6590 for (k=0;k<constraints_n[i];k++) { 6591 PetscInt row = total_counts+k; 6592 PetscScalar *vals; 6593 6594 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6595 PetscCall(MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES)); 6596 } 6597 total_counts += constraints_n[i]; 6598 } 6599 } 6600 /* assembling */ 6601 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6602 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6603 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view")); 6604 6605 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6606 if (pcbddc->use_change_of_basis) { 6607 /* dual and primal dofs on a single cc */ 6608 PetscInt dual_dofs,primal_dofs; 6609 /* working stuff for GEQRF */ 6610 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6611 PetscBLASInt lqr_work; 6612 /* working stuff for UNGQR */ 6613 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6614 PetscBLASInt lgqr_work; 6615 /* working stuff for TRTRS */ 6616 PetscScalar *trs_rhs = NULL; 6617 PetscBLASInt Blas_NRHS; 6618 /* pointers for values insertion into change of basis matrix */ 6619 PetscInt *start_rows,*start_cols; 6620 PetscScalar *start_vals; 6621 /* working stuff for values insertion */ 6622 PetscBT is_primal; 6623 PetscInt *aux_primal_numbering_B; 6624 /* matrix sizes */ 6625 PetscInt global_size,local_size; 6626 /* temporary change of basis */ 6627 Mat localChangeOfBasisMatrix; 6628 /* extra space for debugging */ 6629 PetscScalar *dbg_work = NULL; 6630 6631 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6632 PetscCall(MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix)); 6633 PetscCall(MatSetType(localChangeOfBasisMatrix,MATAIJ)); 6634 PetscCall(MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n)); 6635 /* nonzeros for local mat */ 6636 PetscCall(PetscMalloc1(pcis->n,&nnz)); 6637 if (!pcbddc->benign_change || pcbddc->fake_change) { 6638 for (i=0;i<pcis->n;i++) nnz[i]=1; 6639 } else { 6640 const PetscInt *ii; 6641 PetscInt n; 6642 PetscBool flg_row; 6643 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6644 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6645 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6646 } 6647 for (i=n_vertices;i<total_counts_cc;i++) { 6648 if (PetscBTLookup(change_basis,i)) { 6649 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6650 if (PetscBTLookup(qr_needed_idx,i)) { 6651 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6652 } else { 6653 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6654 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6655 } 6656 } 6657 } 6658 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz)); 6659 PetscCall(MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6660 PetscCall(PetscFree(nnz)); 6661 /* Set interior change in the matrix */ 6662 if (!pcbddc->benign_change || pcbddc->fake_change) { 6663 for (i=0;i<pcis->n;i++) { 6664 PetscCall(MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES)); 6665 } 6666 } else { 6667 const PetscInt *ii,*jj; 6668 PetscScalar *aa; 6669 PetscInt n; 6670 PetscBool flg_row; 6671 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6672 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change,&aa)); 6673 for (i=0;i<n;i++) { 6674 PetscCall(MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES)); 6675 } 6676 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change,&aa)); 6677 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6678 } 6679 6680 if (pcbddc->dbg_flag) { 6681 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6682 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank)); 6683 } 6684 6685 /* Now we loop on the constraints which need a change of basis */ 6686 /* 6687 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6688 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6689 6690 Basic blocks of change of basis matrix T computed by 6691 6692 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6693 6694 | 1 0 ... 0 s_1/S | 6695 | 0 1 ... 0 s_2/S | 6696 | ... | 6697 | 0 ... 1 s_{n-1}/S | 6698 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6699 6700 with S = \sum_{i=1}^n s_i^2 6701 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6702 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6703 6704 - QR decomposition of constraints otherwise 6705 */ 6706 if (qr_needed && max_size_of_constraint) { 6707 /* space to store Q */ 6708 PetscCall(PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis)); 6709 /* array to store scaling factors for reflectors */ 6710 PetscCall(PetscMalloc1(max_constraints,&qr_tau)); 6711 /* first we issue queries for optimal work */ 6712 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6713 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6714 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6715 lqr_work = -1; 6716 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6717 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6718 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work)); 6719 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work)); 6720 lgqr_work = -1; 6721 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6722 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_N)); 6723 PetscCall(PetscBLASIntCast(max_constraints,&Blas_K)); 6724 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6725 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6726 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6727 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6728 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work)); 6729 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work)); 6730 /* array to store rhs and solution of triangular solver */ 6731 PetscCall(PetscMalloc1(max_constraints*max_constraints,&trs_rhs)); 6732 /* allocating workspace for check */ 6733 if (pcbddc->dbg_flag) { 6734 PetscCall(PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work)); 6735 } 6736 } 6737 /* array to store whether a node is primal or not */ 6738 PetscCall(PetscBTCreate(pcis->n_B,&is_primal)); 6739 PetscCall(PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B)); 6740 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B)); 6741 PetscCheckFalse(i != total_primal_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6742 for (i=0;i<total_primal_vertices;i++) { 6743 PetscCall(PetscBTSet(is_primal,aux_primal_numbering_B[i])); 6744 } 6745 PetscCall(PetscFree(aux_primal_numbering_B)); 6746 6747 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6748 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6749 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6750 if (PetscBTLookup(change_basis,total_counts)) { 6751 /* get constraint info */ 6752 primal_dofs = constraints_n[total_counts]; 6753 dual_dofs = size_of_constraint-primal_dofs; 6754 6755 if (pcbddc->dbg_flag) { 6756 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint)); 6757 } 6758 6759 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6760 6761 /* copy quadrature constraints for change of basis check */ 6762 if (pcbddc->dbg_flag) { 6763 PetscCall(PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6764 } 6765 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6766 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6767 6768 /* compute QR decomposition of constraints */ 6769 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6770 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6771 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6772 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6773 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6774 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6775 PetscCall(PetscFPTrapPop()); 6776 6777 /* explicitly compute R^-T */ 6778 PetscCall(PetscArrayzero(trs_rhs,primal_dofs*primal_dofs)); 6779 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6780 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6781 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_NRHS)); 6782 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6783 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6784 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6785 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6786 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6787 PetscCall(PetscFPTrapPop()); 6788 6789 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6790 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6791 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6792 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6793 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6794 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6795 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6796 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6797 PetscCall(PetscFPTrapPop()); 6798 6799 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6800 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6801 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6802 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6803 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6804 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6805 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6806 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6807 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6808 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6809 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6810 PetscCall(PetscFPTrapPop()); 6811 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6812 6813 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6814 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6815 /* insert cols for primal dofs */ 6816 for (j=0;j<primal_dofs;j++) { 6817 start_vals = &qr_basis[j*size_of_constraint]; 6818 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6819 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6820 } 6821 /* insert cols for dual dofs */ 6822 for (j=0,k=0;j<dual_dofs;k++) { 6823 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6824 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6825 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6826 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6827 j++; 6828 } 6829 } 6830 6831 /* check change of basis */ 6832 if (pcbddc->dbg_flag) { 6833 PetscInt ii,jj; 6834 PetscBool valid_qr=PETSC_TRUE; 6835 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_M)); 6836 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6837 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_K)); 6838 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6839 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDB)); 6840 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDC)); 6841 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6842 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6843 PetscCall(PetscFPTrapPop()); 6844 for (jj=0;jj<size_of_constraint;jj++) { 6845 for (ii=0;ii<primal_dofs;ii++) { 6846 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6847 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6848 } 6849 } 6850 if (!valid_qr) { 6851 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n")); 6852 for (jj=0;jj<size_of_constraint;jj++) { 6853 for (ii=0;ii<primal_dofs;ii++) { 6854 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6855 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6856 } 6857 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6858 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6859 } 6860 } 6861 } 6862 } else { 6863 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n")); 6864 } 6865 } 6866 } else { /* simple transformation block */ 6867 PetscInt row,col; 6868 PetscScalar val,norm; 6869 6870 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6871 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6872 for (j=0;j<size_of_constraint;j++) { 6873 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6874 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6875 if (!PetscBTLookup(is_primal,row_B)) { 6876 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6877 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES)); 6878 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES)); 6879 } else { 6880 for (k=0;k<size_of_constraint;k++) { 6881 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6882 if (row != col) { 6883 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6884 } else { 6885 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6886 } 6887 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES)); 6888 } 6889 } 6890 } 6891 if (pcbddc->dbg_flag) { 6892 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n")); 6893 } 6894 } 6895 } else { 6896 if (pcbddc->dbg_flag) { 6897 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint)); 6898 } 6899 } 6900 } 6901 6902 /* free workspace */ 6903 if (qr_needed) { 6904 if (pcbddc->dbg_flag) { 6905 PetscCall(PetscFree(dbg_work)); 6906 } 6907 PetscCall(PetscFree(trs_rhs)); 6908 PetscCall(PetscFree(qr_tau)); 6909 PetscCall(PetscFree(qr_work)); 6910 PetscCall(PetscFree(gqr_work)); 6911 PetscCall(PetscFree(qr_basis)); 6912 } 6913 PetscCall(PetscBTDestroy(&is_primal)); 6914 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6915 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6916 6917 /* assembling of global change of variable */ 6918 if (!pcbddc->fake_change) { 6919 Mat tmat; 6920 PetscInt bs; 6921 6922 PetscCall(VecGetSize(pcis->vec1_global,&global_size)); 6923 PetscCall(VecGetLocalSize(pcis->vec1_global,&local_size)); 6924 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat)); 6925 PetscCall(MatISSetLocalMat(tmat,localChangeOfBasisMatrix)); 6926 PetscCall(MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY)); 6927 PetscCall(MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY)); 6928 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix)); 6929 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ)); 6930 PetscCall(MatGetBlockSize(pc->pmat,&bs)); 6931 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs)); 6932 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size)); 6933 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE)); 6934 PetscCall(MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix)); 6935 PetscCall(MatDestroy(&tmat)); 6936 PetscCall(VecSet(pcis->vec1_global,0.0)); 6937 PetscCall(VecSet(pcis->vec1_N,1.0)); 6938 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6939 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6940 PetscCall(VecReciprocal(pcis->vec1_global)); 6941 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL)); 6942 6943 /* check */ 6944 if (pcbddc->dbg_flag) { 6945 PetscReal error; 6946 Vec x,x_change; 6947 6948 PetscCall(VecDuplicate(pcis->vec1_global,&x)); 6949 PetscCall(VecDuplicate(pcis->vec1_global,&x_change)); 6950 PetscCall(VecSetRandom(x,NULL)); 6951 PetscCall(VecCopy(x,pcis->vec1_global)); 6952 PetscCall(VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6953 PetscCall(VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6954 PetscCall(MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N)); 6955 PetscCall(VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6956 PetscCall(VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6957 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change)); 6958 PetscCall(VecAXPY(x,-1.0,x_change)); 6959 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 6960 if (error > PETSC_SMALL) { 6961 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6962 } 6963 PetscCall(VecDestroy(&x)); 6964 PetscCall(VecDestroy(&x_change)); 6965 } 6966 /* adapt sub_schurs computed (if any) */ 6967 if (pcbddc->use_deluxe_scaling) { 6968 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6969 6970 PetscCheckFalse(pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6971 if (sub_schurs && sub_schurs->S_Ej_all) { 6972 Mat S_new,tmat; 6973 IS is_all_N,is_V_Sall = NULL; 6974 6975 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N)); 6976 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat)); 6977 if (pcbddc->deluxe_zerorows) { 6978 ISLocalToGlobalMapping NtoSall; 6979 IS is_V; 6980 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V)); 6981 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall)); 6982 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall)); 6983 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6984 PetscCall(ISDestroy(&is_V)); 6985 } 6986 PetscCall(ISDestroy(&is_all_N)); 6987 PetscCall(MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 6988 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6989 PetscCall(PetscObjectReference((PetscObject)S_new)); 6990 if (pcbddc->deluxe_zerorows) { 6991 const PetscScalar *array; 6992 const PetscInt *idxs_V,*idxs_all; 6993 PetscInt i,n_V; 6994 6995 PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 6996 PetscCall(ISGetLocalSize(is_V_Sall,&n_V)); 6997 PetscCall(ISGetIndices(is_V_Sall,&idxs_V)); 6998 PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs_all)); 6999 PetscCall(VecGetArrayRead(pcis->D,&array)); 7000 for (i=0;i<n_V;i++) { 7001 PetscScalar val; 7002 PetscInt idx; 7003 7004 idx = idxs_V[i]; 7005 val = array[idxs_all[idxs_V[i]]]; 7006 PetscCall(MatSetValue(S_new,idx,idx,val,INSERT_VALUES)); 7007 } 7008 PetscCall(MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY)); 7009 PetscCall(MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY)); 7010 PetscCall(VecRestoreArrayRead(pcis->D,&array)); 7011 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all)); 7012 PetscCall(ISRestoreIndices(is_V_Sall,&idxs_V)); 7013 } 7014 sub_schurs->S_Ej_all = S_new; 7015 PetscCall(MatDestroy(&S_new)); 7016 if (sub_schurs->sum_S_Ej_all) { 7017 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 7018 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7019 PetscCall(PetscObjectReference((PetscObject)S_new)); 7020 if (pcbddc->deluxe_zerorows) { 7021 PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 7022 } 7023 sub_schurs->sum_S_Ej_all = S_new; 7024 PetscCall(MatDestroy(&S_new)); 7025 } 7026 PetscCall(ISDestroy(&is_V_Sall)); 7027 PetscCall(MatDestroy(&tmat)); 7028 } 7029 /* destroy any change of basis context in sub_schurs */ 7030 if (sub_schurs && sub_schurs->change) { 7031 PetscInt i; 7032 7033 for (i=0;i<sub_schurs->n_subs;i++) { 7034 PetscCall(KSPDestroy(&sub_schurs->change[i])); 7035 } 7036 PetscCall(PetscFree(sub_schurs->change)); 7037 } 7038 } 7039 if (pcbddc->switch_static) { /* need to save the local change */ 7040 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7041 } else { 7042 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7043 } 7044 /* determine if any process has changed the pressures locally */ 7045 pcbddc->change_interior = pcbddc->benign_have_null; 7046 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7047 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7048 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7049 pcbddc->use_qr_single = qr_needed; 7050 } 7051 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7052 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7053 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7054 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7055 } else { 7056 Mat benign_global = NULL; 7057 if (pcbddc->benign_have_null) { 7058 Mat M; 7059 7060 pcbddc->change_interior = PETSC_TRUE; 7061 PetscCall(VecCopy(matis->counter,pcis->vec1_N)); 7062 PetscCall(VecReciprocal(pcis->vec1_N)); 7063 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global)); 7064 if (pcbddc->benign_change) { 7065 PetscCall(MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M)); 7066 PetscCall(MatDiagonalScale(M,pcis->vec1_N,NULL)); 7067 } else { 7068 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M)); 7069 PetscCall(MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES)); 7070 } 7071 PetscCall(MatISSetLocalMat(benign_global,M)); 7072 PetscCall(MatDestroy(&M)); 7073 PetscCall(MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY)); 7074 PetscCall(MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY)); 7075 } 7076 if (pcbddc->user_ChangeOfBasisMatrix) { 7077 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix)); 7078 PetscCall(MatDestroy(&benign_global)); 7079 } else if (pcbddc->benign_have_null) { 7080 pcbddc->ChangeOfBasisMatrix = benign_global; 7081 } 7082 } 7083 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7084 IS is_global; 7085 const PetscInt *gidxs; 7086 7087 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping,&gidxs)); 7088 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global)); 7089 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping,&gidxs)); 7090 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change)); 7091 PetscCall(ISDestroy(&is_global)); 7092 } 7093 } 7094 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7095 PetscCall(VecDuplicate(pcis->vec1_global,&pcbddc->work_change)); 7096 } 7097 7098 if (!pcbddc->fake_change) { 7099 /* add pressure dofs to set of primal nodes for numbering purposes */ 7100 for (i=0;i<pcbddc->benign_n;i++) { 7101 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7102 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7103 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7104 pcbddc->local_primal_size_cc++; 7105 pcbddc->local_primal_size++; 7106 } 7107 7108 /* check if a new primal space has been introduced (also take into account benign trick) */ 7109 pcbddc->new_primal_space_local = PETSC_TRUE; 7110 if (olocal_primal_size == pcbddc->local_primal_size) { 7111 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7112 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7113 if (!pcbddc->new_primal_space_local) { 7114 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7115 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7116 } 7117 } 7118 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7119 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 7120 } 7121 PetscCall(PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult)); 7122 7123 /* flush dbg viewer */ 7124 if (pcbddc->dbg_flag) { 7125 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7126 } 7127 7128 /* free workspace */ 7129 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7130 PetscCall(PetscBTDestroy(&change_basis)); 7131 if (!pcbddc->adaptive_selection) { 7132 PetscCall(PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n)); 7133 PetscCall(PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B)); 7134 } else { 7135 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 7136 pcbddc->adaptive_constraints_idxs_ptr, 7137 pcbddc->adaptive_constraints_data_ptr, 7138 pcbddc->adaptive_constraints_idxs, 7139 pcbddc->adaptive_constraints_data);PetscCall(ierr); 7140 PetscCall(PetscFree(constraints_n)); 7141 PetscCall(PetscFree(constraints_idxs_B)); 7142 } 7143 PetscFunctionReturn(0); 7144 } 7145 7146 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7147 { 7148 ISLocalToGlobalMapping map; 7149 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7150 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7151 PetscInt i,N; 7152 PetscBool rcsr = PETSC_FALSE; 7153 7154 PetscFunctionBegin; 7155 if (pcbddc->recompute_topography) { 7156 pcbddc->graphanalyzed = PETSC_FALSE; 7157 /* Reset previously computed graph */ 7158 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7159 /* Init local Graph struct */ 7160 PetscCall(MatGetSize(pc->pmat,&N,NULL)); 7161 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&map,NULL)); 7162 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount)); 7163 7164 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7165 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local)); 7166 } 7167 /* Check validity of the csr graph passed in by the user */ 7168 PetscCheckFalse(pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7169 7170 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7171 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7172 PetscInt *xadj,*adjncy; 7173 PetscInt nvtxs; 7174 PetscBool flg_row=PETSC_FALSE; 7175 7176 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7177 if (flg_row) { 7178 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES)); 7179 pcbddc->computed_rowadj = PETSC_TRUE; 7180 } 7181 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7182 rcsr = PETSC_TRUE; 7183 } 7184 if (pcbddc->dbg_flag) { 7185 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7186 } 7187 7188 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7189 PetscReal *lcoords; 7190 PetscInt n; 7191 MPI_Datatype dimrealtype; 7192 7193 /* TODO: support for blocked */ 7194 PetscCheckFalse(pcbddc->mat_graph->cnloc != pc->pmat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7195 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7196 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords)); 7197 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype)); 7198 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7199 PetscCall(PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7200 PetscCall(PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7201 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7202 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7203 7204 pcbddc->mat_graph->coords = lcoords; 7205 pcbddc->mat_graph->cloc = PETSC_TRUE; 7206 pcbddc->mat_graph->cnloc = n; 7207 } 7208 PetscCheckFalse(pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7209 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7210 7211 /* Setup of Graph */ 7212 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7213 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local)); 7214 7215 /* attach info on disconnected subdomains if present */ 7216 if (pcbddc->n_local_subs) { 7217 PetscInt *local_subs,n,totn; 7218 7219 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7220 PetscCall(PetscMalloc1(n,&local_subs)); 7221 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7222 for (i=0;i<pcbddc->n_local_subs;i++) { 7223 const PetscInt *idxs; 7224 PetscInt nl,j; 7225 7226 PetscCall(ISGetLocalSize(pcbddc->local_subs[i],&nl)); 7227 PetscCall(ISGetIndices(pcbddc->local_subs[i],&idxs)); 7228 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7229 PetscCall(ISRestoreIndices(pcbddc->local_subs[i],&idxs)); 7230 } 7231 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7232 pcbddc->mat_graph->n_local_subs = totn + 1; 7233 pcbddc->mat_graph->local_subs = local_subs; 7234 } 7235 } 7236 7237 if (!pcbddc->graphanalyzed) { 7238 /* Graph's connected components analysis */ 7239 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7240 pcbddc->graphanalyzed = PETSC_TRUE; 7241 pcbddc->corner_selected = pcbddc->corner_selection; 7242 } 7243 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7244 PetscFunctionReturn(0); 7245 } 7246 7247 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7248 { 7249 PetscInt i,j,n; 7250 PetscScalar *alphas; 7251 PetscReal norm,*onorms; 7252 7253 PetscFunctionBegin; 7254 n = *nio; 7255 if (!n) PetscFunctionReturn(0); 7256 PetscCall(PetscMalloc2(n,&alphas,n,&onorms)); 7257 PetscCall(VecNormalize(vecs[0],&norm)); 7258 if (norm < PETSC_SMALL) { 7259 onorms[0] = 0.0; 7260 PetscCall(VecSet(vecs[0],0.0)); 7261 } else { 7262 onorms[0] = norm; 7263 } 7264 7265 for (i=1;i<n;i++) { 7266 PetscCall(VecMDot(vecs[i],i,vecs,alphas)); 7267 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7268 PetscCall(VecMAXPY(vecs[i],i,alphas,vecs)); 7269 PetscCall(VecNormalize(vecs[i],&norm)); 7270 if (norm < PETSC_SMALL) { 7271 onorms[i] = 0.0; 7272 PetscCall(VecSet(vecs[i],0.0)); 7273 } else { 7274 onorms[i] = norm; 7275 } 7276 } 7277 /* push nonzero vectors at the beginning */ 7278 for (i=0;i<n;i++) { 7279 if (onorms[i] == 0.0) { 7280 for (j=i+1;j<n;j++) { 7281 if (onorms[j] != 0.0) { 7282 PetscCall(VecCopy(vecs[j],vecs[i])); 7283 onorms[j] = 0.0; 7284 } 7285 } 7286 } 7287 } 7288 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7289 PetscCall(PetscFree2(alphas,onorms)); 7290 PetscFunctionReturn(0); 7291 } 7292 7293 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7294 { 7295 ISLocalToGlobalMapping mapping; 7296 Mat A; 7297 PetscInt n_neighs,*neighs,*n_shared,**shared; 7298 PetscMPIInt size,rank,color; 7299 PetscInt *xadj,*adjncy; 7300 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7301 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7302 PetscInt void_procs,*procs_candidates = NULL; 7303 PetscInt xadj_count,*count; 7304 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7305 PetscSubcomm psubcomm; 7306 MPI_Comm subcomm; 7307 7308 PetscFunctionBegin; 7309 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7310 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7311 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7312 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7313 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7314 PetscCheckFalse(*n_subdomains <=0,PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7315 7316 if (have_void) *have_void = PETSC_FALSE; 7317 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size)); 7318 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 7319 PetscCall(MatISGetLocalMat(mat,&A)); 7320 PetscCall(MatGetLocalSize(A,&n,NULL)); 7321 im_active = !!n; 7322 PetscCall(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat))); 7323 void_procs = size - active_procs; 7324 /* get ranks of of non-active processes in mat communicator */ 7325 if (void_procs) { 7326 PetscInt ncand; 7327 7328 if (have_void) *have_void = PETSC_TRUE; 7329 PetscCall(PetscMalloc1(size,&procs_candidates)); 7330 PetscCallMPI(MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 7331 for (i=0,ncand=0;i<size;i++) { 7332 if (!procs_candidates[i]) { 7333 procs_candidates[ncand++] = i; 7334 } 7335 } 7336 /* force n_subdomains to be not greater that the number of non-active processes */ 7337 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7338 } 7339 7340 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7341 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7342 PetscCall(MatGetSize(mat,&N,NULL)); 7343 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7344 PetscInt issize,isidx,dest; 7345 if (*n_subdomains == 1) dest = 0; 7346 else dest = rank; 7347 if (im_active) { 7348 issize = 1; 7349 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7350 isidx = procs_candidates[dest]; 7351 } else { 7352 isidx = dest; 7353 } 7354 } else { 7355 issize = 0; 7356 isidx = -1; 7357 } 7358 if (*n_subdomains != 1) *n_subdomains = active_procs; 7359 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends)); 7360 PetscCall(PetscFree(procs_candidates)); 7361 PetscFunctionReturn(0); 7362 } 7363 PetscCall(PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL)); 7364 PetscCall(PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL)); 7365 threshold = PetscMax(threshold,2); 7366 7367 /* Get info on mapping */ 7368 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7369 PetscCall(ISLocalToGlobalMappingGetInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7370 7371 /* build local CSR graph of subdomains' connectivity */ 7372 PetscCall(PetscMalloc1(2,&xadj)); 7373 xadj[0] = 0; 7374 xadj[1] = PetscMax(n_neighs-1,0); 7375 PetscCall(PetscMalloc1(xadj[1],&adjncy)); 7376 PetscCall(PetscMalloc1(xadj[1],&adjncy_wgt)); 7377 PetscCall(PetscCalloc1(n,&count)); 7378 for (i=1;i<n_neighs;i++) 7379 for (j=0;j<n_shared[i];j++) 7380 count[shared[i][j]] += 1; 7381 7382 xadj_count = 0; 7383 for (i=1;i<n_neighs;i++) { 7384 for (j=0;j<n_shared[i];j++) { 7385 if (count[shared[i][j]] < threshold) { 7386 adjncy[xadj_count] = neighs[i]; 7387 adjncy_wgt[xadj_count] = n_shared[i]; 7388 xadj_count++; 7389 break; 7390 } 7391 } 7392 } 7393 xadj[1] = xadj_count; 7394 PetscCall(PetscFree(count)); 7395 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7396 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7397 7398 PetscCall(PetscMalloc1(1,&ranks_send_to_idx)); 7399 7400 /* Restrict work on active processes only */ 7401 PetscCall(PetscMPIIntCast(im_active,&color)); 7402 if (void_procs) { 7403 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm)); 7404 PetscCall(PetscSubcommSetNumber(psubcomm,2)); /* 2 groups, active process and not active processes */ 7405 PetscCall(PetscSubcommSetTypeGeneral(psubcomm,color,rank)); 7406 subcomm = PetscSubcommChild(psubcomm); 7407 } else { 7408 psubcomm = NULL; 7409 subcomm = PetscObjectComm((PetscObject)mat); 7410 } 7411 7412 v_wgt = NULL; 7413 if (!color) { 7414 PetscCall(PetscFree(xadj)); 7415 PetscCall(PetscFree(adjncy)); 7416 PetscCall(PetscFree(adjncy_wgt)); 7417 } else { 7418 Mat subdomain_adj; 7419 IS new_ranks,new_ranks_contig; 7420 MatPartitioning partitioner; 7421 PetscInt rstart=0,rend=0; 7422 PetscInt *is_indices,*oldranks; 7423 PetscMPIInt size; 7424 PetscBool aggregate; 7425 7426 PetscCallMPI(MPI_Comm_size(subcomm,&size)); 7427 if (void_procs) { 7428 PetscInt prank = rank; 7429 PetscCall(PetscMalloc1(size,&oldranks)); 7430 PetscCallMPI(MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm)); 7431 for (i=0;i<xadj[1];i++) { 7432 PetscCall(PetscFindInt(adjncy[i],size,oldranks,&adjncy[i])); 7433 } 7434 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7435 } else { 7436 oldranks = NULL; 7437 } 7438 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7439 if (aggregate) { /* TODO: all this part could be made more efficient */ 7440 PetscInt lrows,row,ncols,*cols; 7441 PetscMPIInt nrank; 7442 PetscScalar *vals; 7443 7444 PetscCallMPI(MPI_Comm_rank(subcomm,&nrank)); 7445 lrows = 0; 7446 if (nrank<redprocs) { 7447 lrows = size/redprocs; 7448 if (nrank<size%redprocs) lrows++; 7449 } 7450 PetscCall(MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj)); 7451 PetscCall(MatGetOwnershipRange(subdomain_adj,&rstart,&rend)); 7452 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 7453 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 7454 row = nrank; 7455 ncols = xadj[1]-xadj[0]; 7456 cols = adjncy; 7457 PetscCall(PetscMalloc1(ncols,&vals)); 7458 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7459 PetscCall(MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES)); 7460 PetscCall(MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7461 PetscCall(MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7462 PetscCall(PetscFree(xadj)); 7463 PetscCall(PetscFree(adjncy)); 7464 PetscCall(PetscFree(adjncy_wgt)); 7465 PetscCall(PetscFree(vals)); 7466 if (use_vwgt) { 7467 Vec v; 7468 const PetscScalar *array; 7469 PetscInt nl; 7470 7471 PetscCall(MatCreateVecs(subdomain_adj,&v,NULL)); 7472 PetscCall(VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES)); 7473 PetscCall(VecAssemblyBegin(v)); 7474 PetscCall(VecAssemblyEnd(v)); 7475 PetscCall(VecGetLocalSize(v,&nl)); 7476 PetscCall(VecGetArrayRead(v,&array)); 7477 PetscCall(PetscMalloc1(nl,&v_wgt)); 7478 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7479 PetscCall(VecRestoreArrayRead(v,&array)); 7480 PetscCall(VecDestroy(&v)); 7481 } 7482 } else { 7483 PetscCall(MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj)); 7484 if (use_vwgt) { 7485 PetscCall(PetscMalloc1(1,&v_wgt)); 7486 v_wgt[0] = n; 7487 } 7488 } 7489 /* PetscCall(MatView(subdomain_adj,0)); */ 7490 7491 /* Partition */ 7492 PetscCall(MatPartitioningCreate(subcomm,&partitioner)); 7493 #if defined(PETSC_HAVE_PTSCOTCH) 7494 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH)); 7495 #elif defined(PETSC_HAVE_PARMETIS) 7496 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS)); 7497 #else 7498 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE)); 7499 #endif 7500 PetscCall(MatPartitioningSetAdjacency(partitioner,subdomain_adj)); 7501 if (v_wgt) { 7502 PetscCall(MatPartitioningSetVertexWeights(partitioner,v_wgt)); 7503 } 7504 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7505 PetscCall(MatPartitioningSetNParts(partitioner,*n_subdomains)); 7506 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7507 PetscCall(MatPartitioningApply(partitioner,&new_ranks)); 7508 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7509 7510 /* renumber new_ranks to avoid "holes" in new set of processors */ 7511 PetscCall(ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig)); 7512 PetscCall(ISDestroy(&new_ranks)); 7513 PetscCall(ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7514 if (!aggregate) { 7515 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7516 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7517 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7518 } else if (oldranks) { 7519 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7520 } else { 7521 ranks_send_to_idx[0] = is_indices[0]; 7522 } 7523 } else { 7524 PetscInt idx = 0; 7525 PetscMPIInt tag; 7526 MPI_Request *reqs; 7527 7528 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag)); 7529 PetscCall(PetscMalloc1(rend-rstart,&reqs)); 7530 for (i=rstart;i<rend;i++) { 7531 PetscCallMPI(MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart])); 7532 } 7533 PetscCallMPI(MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE)); 7534 PetscCallMPI(MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE)); 7535 PetscCall(PetscFree(reqs)); 7536 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7537 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7538 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7539 } else if (oldranks) { 7540 ranks_send_to_idx[0] = oldranks[idx]; 7541 } else { 7542 ranks_send_to_idx[0] = idx; 7543 } 7544 } 7545 PetscCall(ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7546 /* clean up */ 7547 PetscCall(PetscFree(oldranks)); 7548 PetscCall(ISDestroy(&new_ranks_contig)); 7549 PetscCall(MatDestroy(&subdomain_adj)); 7550 PetscCall(MatPartitioningDestroy(&partitioner)); 7551 } 7552 PetscCall(PetscSubcommDestroy(&psubcomm)); 7553 PetscCall(PetscFree(procs_candidates)); 7554 7555 /* assemble parallel IS for sends */ 7556 i = 1; 7557 if (!color) i=0; 7558 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends)); 7559 PetscFunctionReturn(0); 7560 } 7561 7562 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7563 7564 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7565 { 7566 Mat local_mat; 7567 IS is_sends_internal; 7568 PetscInt rows,cols,new_local_rows; 7569 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7570 PetscBool ismatis,isdense,newisdense,destroy_mat; 7571 ISLocalToGlobalMapping l2gmap; 7572 PetscInt* l2gmap_indices; 7573 const PetscInt* is_indices; 7574 MatType new_local_type; 7575 /* buffers */ 7576 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7577 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7578 PetscInt *recv_buffer_idxs_local; 7579 PetscScalar *ptr_vals,*recv_buffer_vals; 7580 const PetscScalar *send_buffer_vals; 7581 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7582 /* MPI */ 7583 MPI_Comm comm,comm_n; 7584 PetscSubcomm subcomm; 7585 PetscMPIInt n_sends,n_recvs,size; 7586 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7587 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7588 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7589 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7590 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7591 7592 PetscFunctionBegin; 7593 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7594 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7595 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7596 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7597 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7598 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7599 PetscValidLogicalCollectiveBool(mat,reuse,6); 7600 PetscValidLogicalCollectiveInt(mat,nis,8); 7601 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7602 if (nvecs) { 7603 PetscCheckFalse(nvecs > 1,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7604 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7605 } 7606 /* further checks */ 7607 PetscCall(MatISGetLocalMat(mat,&local_mat)); 7608 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense)); 7609 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7610 PetscCall(MatGetSize(local_mat,&rows,&cols)); 7611 PetscCheck(rows == cols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7612 if (reuse && *mat_n) { 7613 PetscInt mrows,mcols,mnrows,mncols; 7614 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7615 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis)); 7616 PetscCheck(ismatis,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7617 PetscCall(MatGetSize(mat,&mrows,&mcols)); 7618 PetscCall(MatGetSize(*mat_n,&mnrows,&mncols)); 7619 PetscCheck(mrows == mnrows,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7620 PetscCheck(mcols == mncols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7621 } 7622 PetscCall(MatGetBlockSize(local_mat,&bs)); 7623 PetscValidLogicalCollectiveInt(mat,bs,1); 7624 7625 /* prepare IS for sending if not provided */ 7626 if (!is_sends) { 7627 PetscCheck(n_subdomains,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7628 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL)); 7629 } else { 7630 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7631 is_sends_internal = is_sends; 7632 } 7633 7634 /* get comm */ 7635 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 7636 7637 /* compute number of sends */ 7638 PetscCall(ISGetLocalSize(is_sends_internal,&i)); 7639 PetscCall(PetscMPIIntCast(i,&n_sends)); 7640 7641 /* compute number of receives */ 7642 PetscCallMPI(MPI_Comm_size(comm,&size)); 7643 PetscCall(PetscMalloc1(size,&iflags)); 7644 PetscCall(PetscArrayzero(iflags,size)); 7645 PetscCall(ISGetIndices(is_sends_internal,&is_indices)); 7646 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7647 PetscCall(PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs)); 7648 PetscCall(PetscFree(iflags)); 7649 7650 /* restrict comm if requested */ 7651 subcomm = NULL; 7652 destroy_mat = PETSC_FALSE; 7653 if (restrict_comm) { 7654 PetscMPIInt color,subcommsize; 7655 7656 color = 0; 7657 if (restrict_full) { 7658 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7659 } else { 7660 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7661 } 7662 PetscCall(MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm)); 7663 subcommsize = size - subcommsize; 7664 /* check if reuse has been requested */ 7665 if (reuse) { 7666 if (*mat_n) { 7667 PetscMPIInt subcommsize2; 7668 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2)); 7669 PetscCheck(subcommsize == subcommsize2,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7670 comm_n = PetscObjectComm((PetscObject)*mat_n); 7671 } else { 7672 comm_n = PETSC_COMM_SELF; 7673 } 7674 } else { /* MAT_INITIAL_MATRIX */ 7675 PetscMPIInt rank; 7676 7677 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 7678 PetscCall(PetscSubcommCreate(comm,&subcomm)); 7679 PetscCall(PetscSubcommSetNumber(subcomm,2)); 7680 PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank)); 7681 comm_n = PetscSubcommChild(subcomm); 7682 } 7683 /* flag to destroy *mat_n if not significative */ 7684 if (color) destroy_mat = PETSC_TRUE; 7685 } else { 7686 comm_n = comm; 7687 } 7688 7689 /* prepare send/receive buffers */ 7690 PetscCall(PetscMalloc1(size,&ilengths_idxs)); 7691 PetscCall(PetscArrayzero(ilengths_idxs,size)); 7692 PetscCall(PetscMalloc1(size,&ilengths_vals)); 7693 PetscCall(PetscArrayzero(ilengths_vals,size)); 7694 if (nis) { 7695 PetscCall(PetscCalloc1(size,&ilengths_idxs_is)); 7696 } 7697 7698 /* Get data from local matrices */ 7699 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7700 /* TODO: See below some guidelines on how to prepare the local buffers */ 7701 /* 7702 send_buffer_vals should contain the raw values of the local matrix 7703 send_buffer_idxs should contain: 7704 - MatType_PRIVATE type 7705 - PetscInt size_of_l2gmap 7706 - PetscInt global_row_indices[size_of_l2gmap] 7707 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7708 */ 7709 { 7710 ISLocalToGlobalMapping mapping; 7711 7712 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7713 PetscCall(MatDenseGetArrayRead(local_mat,&send_buffer_vals)); 7714 PetscCall(ISLocalToGlobalMappingGetSize(mapping,&i)); 7715 PetscCall(PetscMalloc1(i+2,&send_buffer_idxs)); 7716 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7717 send_buffer_idxs[1] = i; 7718 PetscCall(ISLocalToGlobalMappingGetIndices(mapping,(const PetscInt**)&ptr_idxs)); 7719 PetscCall(PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i)); 7720 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping,(const PetscInt**)&ptr_idxs)); 7721 PetscCall(PetscMPIIntCast(i,&len)); 7722 for (i=0;i<n_sends;i++) { 7723 ilengths_vals[is_indices[i]] = len*len; 7724 ilengths_idxs[is_indices[i]] = len+2; 7725 } 7726 } 7727 PetscCall(PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals)); 7728 /* additional is (if any) */ 7729 if (nis) { 7730 PetscMPIInt psum; 7731 PetscInt j; 7732 for (j=0,psum=0;j<nis;j++) { 7733 PetscInt plen; 7734 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7735 PetscCall(PetscMPIIntCast(plen,&len)); 7736 psum += len+1; /* indices + lenght */ 7737 } 7738 PetscCall(PetscMalloc1(psum,&send_buffer_idxs_is)); 7739 for (j=0,psum=0;j<nis;j++) { 7740 PetscInt plen; 7741 const PetscInt *is_array_idxs; 7742 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7743 send_buffer_idxs_is[psum] = plen; 7744 PetscCall(ISGetIndices(isarray[j],&is_array_idxs)); 7745 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen)); 7746 PetscCall(ISRestoreIndices(isarray[j],&is_array_idxs)); 7747 psum += plen+1; /* indices + lenght */ 7748 } 7749 for (i=0;i<n_sends;i++) { 7750 ilengths_idxs_is[is_indices[i]] = psum; 7751 } 7752 PetscCall(PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is)); 7753 } 7754 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 7755 7756 buf_size_idxs = 0; 7757 buf_size_vals = 0; 7758 buf_size_idxs_is = 0; 7759 buf_size_vecs = 0; 7760 for (i=0;i<n_recvs;i++) { 7761 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7762 buf_size_vals += (PetscInt)olengths_vals[i]; 7763 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7764 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7765 } 7766 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs)); 7767 PetscCall(PetscMalloc1(buf_size_vals,&recv_buffer_vals)); 7768 PetscCall(PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is)); 7769 PetscCall(PetscMalloc1(buf_size_vecs,&recv_buffer_vecs)); 7770 7771 /* get new tags for clean communications */ 7772 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs)); 7773 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vals)); 7774 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is)); 7775 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vecs)); 7776 7777 /* allocate for requests */ 7778 PetscCall(PetscMalloc1(n_sends,&send_req_idxs)); 7779 PetscCall(PetscMalloc1(n_sends,&send_req_vals)); 7780 PetscCall(PetscMalloc1(n_sends,&send_req_idxs_is)); 7781 PetscCall(PetscMalloc1(n_sends,&send_req_vecs)); 7782 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs)); 7783 PetscCall(PetscMalloc1(n_recvs,&recv_req_vals)); 7784 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs_is)); 7785 PetscCall(PetscMalloc1(n_recvs,&recv_req_vecs)); 7786 7787 /* communications */ 7788 ptr_idxs = recv_buffer_idxs; 7789 ptr_vals = recv_buffer_vals; 7790 ptr_idxs_is = recv_buffer_idxs_is; 7791 ptr_vecs = recv_buffer_vecs; 7792 for (i=0;i<n_recvs;i++) { 7793 source_dest = onodes[i]; 7794 PetscCallMPI(MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i])); 7795 PetscCallMPI(MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i])); 7796 ptr_idxs += olengths_idxs[i]; 7797 ptr_vals += olengths_vals[i]; 7798 if (nis) { 7799 source_dest = onodes_is[i]; 7800 PetscCallMPI(MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i])); 7801 ptr_idxs_is += olengths_idxs_is[i]; 7802 } 7803 if (nvecs) { 7804 source_dest = onodes[i]; 7805 PetscCallMPI(MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i])); 7806 ptr_vecs += olengths_idxs[i]-2; 7807 } 7808 } 7809 for (i=0;i<n_sends;i++) { 7810 PetscCall(PetscMPIIntCast(is_indices[i],&source_dest)); 7811 PetscCallMPI(MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i])); 7812 PetscCallMPI(MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i])); 7813 if (nis) { 7814 PetscCallMPI(MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i])); 7815 } 7816 if (nvecs) { 7817 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 7818 PetscCallMPI(MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i])); 7819 } 7820 } 7821 PetscCall(ISRestoreIndices(is_sends_internal,&is_indices)); 7822 PetscCall(ISDestroy(&is_sends_internal)); 7823 7824 /* assemble new l2g map */ 7825 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE)); 7826 ptr_idxs = recv_buffer_idxs; 7827 new_local_rows = 0; 7828 for (i=0;i<n_recvs;i++) { 7829 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7830 ptr_idxs += olengths_idxs[i]; 7831 } 7832 PetscCall(PetscMalloc1(new_local_rows,&l2gmap_indices)); 7833 ptr_idxs = recv_buffer_idxs; 7834 new_local_rows = 0; 7835 for (i=0;i<n_recvs;i++) { 7836 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1))); 7837 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7838 ptr_idxs += olengths_idxs[i]; 7839 } 7840 PetscCall(PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices)); 7841 PetscCall(ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap)); 7842 PetscCall(PetscFree(l2gmap_indices)); 7843 7844 /* infer new local matrix type from received local matrices type */ 7845 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7846 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7847 if (n_recvs) { 7848 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7849 ptr_idxs = recv_buffer_idxs; 7850 for (i=0;i<n_recvs;i++) { 7851 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7852 new_local_type_private = MATAIJ_PRIVATE; 7853 break; 7854 } 7855 ptr_idxs += olengths_idxs[i]; 7856 } 7857 switch (new_local_type_private) { 7858 case MATDENSE_PRIVATE: 7859 new_local_type = MATSEQAIJ; 7860 bs = 1; 7861 break; 7862 case MATAIJ_PRIVATE: 7863 new_local_type = MATSEQAIJ; 7864 bs = 1; 7865 break; 7866 case MATBAIJ_PRIVATE: 7867 new_local_type = MATSEQBAIJ; 7868 break; 7869 case MATSBAIJ_PRIVATE: 7870 new_local_type = MATSEQSBAIJ; 7871 break; 7872 default: 7873 SETERRQ(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7874 } 7875 } else { /* by default, new_local_type is seqaij */ 7876 new_local_type = MATSEQAIJ; 7877 bs = 1; 7878 } 7879 7880 /* create MATIS object if needed */ 7881 if (!reuse) { 7882 PetscCall(MatGetSize(mat,&rows,&cols)); 7883 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7884 } else { 7885 /* it also destroys the local matrices */ 7886 if (*mat_n) { 7887 PetscCall(MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap)); 7888 } else { /* this is a fake object */ 7889 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7890 } 7891 } 7892 PetscCall(MatISGetLocalMat(*mat_n,&local_mat)); 7893 PetscCall(MatSetType(local_mat,new_local_type)); 7894 7895 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE)); 7896 7897 /* Global to local map of received indices */ 7898 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local)); /* needed for values insertion */ 7899 PetscCall(ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local)); 7900 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7901 7902 /* restore attributes -> type of incoming data and its size */ 7903 buf_size_idxs = 0; 7904 for (i=0;i<n_recvs;i++) { 7905 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7906 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7907 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7908 } 7909 PetscCall(PetscFree(recv_buffer_idxs)); 7910 7911 /* set preallocation */ 7912 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense)); 7913 if (!newisdense) { 7914 PetscInt *new_local_nnz=NULL; 7915 7916 ptr_idxs = recv_buffer_idxs_local; 7917 if (n_recvs) { 7918 PetscCall(PetscCalloc1(new_local_rows,&new_local_nnz)); 7919 } 7920 for (i=0;i<n_recvs;i++) { 7921 PetscInt j; 7922 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7923 for (j=0;j<*(ptr_idxs+1);j++) { 7924 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7925 } 7926 } else { 7927 /* TODO */ 7928 } 7929 ptr_idxs += olengths_idxs[i]; 7930 } 7931 if (new_local_nnz) { 7932 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7933 PetscCall(MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz)); 7934 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7935 PetscCall(MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7936 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7937 PetscCall(MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7938 } else { 7939 PetscCall(MatSetUp(local_mat)); 7940 } 7941 PetscCall(PetscFree(new_local_nnz)); 7942 } else { 7943 PetscCall(MatSetUp(local_mat)); 7944 } 7945 7946 /* set values */ 7947 ptr_vals = recv_buffer_vals; 7948 ptr_idxs = recv_buffer_idxs_local; 7949 for (i=0;i<n_recvs;i++) { 7950 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7951 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 7952 PetscCall(MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES)); 7953 PetscCall(MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY)); 7954 PetscCall(MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY)); 7955 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 7956 } else { 7957 /* TODO */ 7958 } 7959 ptr_idxs += olengths_idxs[i]; 7960 ptr_vals += olengths_vals[i]; 7961 } 7962 PetscCall(MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY)); 7963 PetscCall(MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY)); 7964 PetscCall(MatISRestoreLocalMat(*mat_n,&local_mat)); 7965 PetscCall(MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY)); 7966 PetscCall(MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY)); 7967 PetscCall(PetscFree(recv_buffer_vals)); 7968 7969 #if 0 7970 if (!restrict_comm) { /* check */ 7971 Vec lvec,rvec; 7972 PetscReal infty_error; 7973 7974 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7975 PetscCall(VecSetRandom(rvec,NULL)); 7976 PetscCall(MatMult(mat,rvec,lvec)); 7977 PetscCall(VecScale(lvec,-1.0)); 7978 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7979 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7980 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7981 PetscCall(VecDestroy(&rvec)); 7982 PetscCall(VecDestroy(&lvec)); 7983 } 7984 #endif 7985 7986 /* assemble new additional is (if any) */ 7987 if (nis) { 7988 PetscInt **temp_idxs,*count_is,j,psum; 7989 7990 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE)); 7991 PetscCall(PetscCalloc1(nis,&count_is)); 7992 ptr_idxs = recv_buffer_idxs_is; 7993 psum = 0; 7994 for (i=0;i<n_recvs;i++) { 7995 for (j=0;j<nis;j++) { 7996 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7997 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7998 psum += plen; 7999 ptr_idxs += plen+1; /* shift pointer to received data */ 8000 } 8001 } 8002 PetscCall(PetscMalloc1(nis,&temp_idxs)); 8003 PetscCall(PetscMalloc1(psum,&temp_idxs[0])); 8004 for (i=1;i<nis;i++) { 8005 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 8006 } 8007 PetscCall(PetscArrayzero(count_is,nis)); 8008 ptr_idxs = recv_buffer_idxs_is; 8009 for (i=0;i<n_recvs;i++) { 8010 for (j=0;j<nis;j++) { 8011 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8012 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen)); 8013 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8014 ptr_idxs += plen+1; /* shift pointer to received data */ 8015 } 8016 } 8017 for (i=0;i<nis;i++) { 8018 PetscCall(ISDestroy(&isarray[i])); 8019 PetscCall(PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i])); 8020 PetscCall(ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i])); 8021 } 8022 PetscCall(PetscFree(count_is)); 8023 PetscCall(PetscFree(temp_idxs[0])); 8024 PetscCall(PetscFree(temp_idxs)); 8025 } 8026 /* free workspace */ 8027 PetscCall(PetscFree(recv_buffer_idxs_is)); 8028 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE)); 8029 PetscCall(PetscFree(send_buffer_idxs)); 8030 PetscCallMPI(MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE)); 8031 if (isdense) { 8032 PetscCall(MatISGetLocalMat(mat,&local_mat)); 8033 PetscCall(MatDenseRestoreArrayRead(local_mat,&send_buffer_vals)); 8034 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 8035 } else { 8036 /* PetscCall(PetscFree(send_buffer_vals)); */ 8037 } 8038 if (nis) { 8039 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE)); 8040 PetscCall(PetscFree(send_buffer_idxs_is)); 8041 } 8042 8043 if (nvecs) { 8044 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE)); 8045 PetscCallMPI(MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE)); 8046 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8047 PetscCall(VecDestroy(&nnsp_vec[0])); 8048 PetscCall(VecCreate(comm_n,&nnsp_vec[0])); 8049 PetscCall(VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE)); 8050 PetscCall(VecSetType(nnsp_vec[0],VECSTANDARD)); 8051 /* set values */ 8052 ptr_vals = recv_buffer_vecs; 8053 ptr_idxs = recv_buffer_idxs_local; 8054 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 8055 for (i=0;i<n_recvs;i++) { 8056 PetscInt j; 8057 for (j=0;j<*(ptr_idxs+1);j++) { 8058 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8059 } 8060 ptr_idxs += olengths_idxs[i]; 8061 ptr_vals += olengths_idxs[i]-2; 8062 } 8063 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8064 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8065 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8066 } 8067 8068 PetscCall(PetscFree(recv_buffer_vecs)); 8069 PetscCall(PetscFree(recv_buffer_idxs_local)); 8070 PetscCall(PetscFree(recv_req_idxs)); 8071 PetscCall(PetscFree(recv_req_vals)); 8072 PetscCall(PetscFree(recv_req_vecs)); 8073 PetscCall(PetscFree(recv_req_idxs_is)); 8074 PetscCall(PetscFree(send_req_idxs)); 8075 PetscCall(PetscFree(send_req_vals)); 8076 PetscCall(PetscFree(send_req_vecs)); 8077 PetscCall(PetscFree(send_req_idxs_is)); 8078 PetscCall(PetscFree(ilengths_vals)); 8079 PetscCall(PetscFree(ilengths_idxs)); 8080 PetscCall(PetscFree(olengths_vals)); 8081 PetscCall(PetscFree(olengths_idxs)); 8082 PetscCall(PetscFree(onodes)); 8083 if (nis) { 8084 PetscCall(PetscFree(ilengths_idxs_is)); 8085 PetscCall(PetscFree(olengths_idxs_is)); 8086 PetscCall(PetscFree(onodes_is)); 8087 } 8088 PetscCall(PetscSubcommDestroy(&subcomm)); 8089 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 8090 PetscCall(MatDestroy(mat_n)); 8091 for (i=0;i<nis;i++) { 8092 PetscCall(ISDestroy(&isarray[i])); 8093 } 8094 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8095 PetscCall(VecDestroy(&nnsp_vec[0])); 8096 } 8097 *mat_n = NULL; 8098 } 8099 PetscFunctionReturn(0); 8100 } 8101 8102 /* temporary hack into ksp private data structure */ 8103 #include <petsc/private/kspimpl.h> 8104 8105 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8106 { 8107 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8108 PC_IS *pcis = (PC_IS*)pc->data; 8109 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8110 Mat coarsedivudotp = NULL; 8111 Mat coarseG,t_coarse_mat_is; 8112 MatNullSpace CoarseNullSpace = NULL; 8113 ISLocalToGlobalMapping coarse_islg; 8114 IS coarse_is,*isarray,corners; 8115 PetscInt i,im_active=-1,active_procs=-1; 8116 PetscInt nis,nisdofs,nisneu,nisvert; 8117 PetscInt coarse_eqs_per_proc; 8118 PC pc_temp; 8119 PCType coarse_pc_type; 8120 KSPType coarse_ksp_type; 8121 PetscBool multilevel_requested,multilevel_allowed; 8122 PetscBool coarse_reuse; 8123 PetscInt ncoarse,nedcfield; 8124 PetscBool compute_vecs = PETSC_FALSE; 8125 PetscScalar *array; 8126 MatReuse coarse_mat_reuse; 8127 PetscBool restr, full_restr, have_void; 8128 PetscMPIInt size; 8129 PetscErrorCode ierr; 8130 8131 PetscFunctionBegin; 8132 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8133 /* Assign global numbering to coarse dofs */ 8134 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8135 PetscInt ocoarse_size; 8136 compute_vecs = PETSC_TRUE; 8137 8138 pcbddc->new_primal_space = PETSC_TRUE; 8139 ocoarse_size = pcbddc->coarse_size; 8140 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8141 PetscCall(PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices)); 8142 /* see if we can avoid some work */ 8143 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8144 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8145 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8146 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8147 coarse_reuse = PETSC_FALSE; 8148 } else { /* we can safely reuse already computed coarse matrix */ 8149 coarse_reuse = PETSC_TRUE; 8150 } 8151 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8152 coarse_reuse = PETSC_FALSE; 8153 } 8154 /* reset any subassembling information */ 8155 if (!coarse_reuse || pcbddc->recompute_topography) { 8156 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8157 } 8158 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8159 coarse_reuse = PETSC_TRUE; 8160 } 8161 if (coarse_reuse && pcbddc->coarse_ksp) { 8162 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 8163 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8164 coarse_mat_reuse = MAT_REUSE_MATRIX; 8165 } else { 8166 coarse_mat = NULL; 8167 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8168 } 8169 8170 /* creates temporary l2gmap and IS for coarse indexes */ 8171 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is)); 8172 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg)); 8173 8174 /* creates temporary MATIS object for coarse matrix */ 8175 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense)); 8176 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,coarse_islg,&t_coarse_mat_is)); 8177 PetscCall(MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense)); 8178 PetscCall(MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8179 PetscCall(MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8180 PetscCall(MatDestroy(&coarse_submat_dense)); 8181 8182 /* count "active" (i.e. with positive local size) and "void" processes */ 8183 im_active = !!(pcis->n); 8184 PetscCall(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8185 8186 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8187 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8188 /* full_restr : just use the receivers from the subassembling pattern */ 8189 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size)); 8190 coarse_mat_is = NULL; 8191 multilevel_allowed = PETSC_FALSE; 8192 multilevel_requested = PETSC_FALSE; 8193 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8194 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8195 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8196 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8197 if (multilevel_requested) { 8198 ncoarse = active_procs/pcbddc->coarsening_ratio; 8199 restr = PETSC_FALSE; 8200 full_restr = PETSC_FALSE; 8201 } else { 8202 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8203 restr = PETSC_TRUE; 8204 full_restr = PETSC_TRUE; 8205 } 8206 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8207 ncoarse = PetscMax(1,ncoarse); 8208 if (!pcbddc->coarse_subassembling) { 8209 if (pcbddc->coarsening_ratio > 1) { 8210 if (multilevel_requested) { 8211 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8212 } else { 8213 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8214 } 8215 } else { 8216 PetscMPIInt rank; 8217 8218 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank)); 8219 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8220 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling)); 8221 } 8222 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8223 PetscInt psum; 8224 if (pcbddc->coarse_ksp) psum = 1; 8225 else psum = 0; 8226 PetscCall(MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8227 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8228 } 8229 /* determine if we can go multilevel */ 8230 if (multilevel_requested) { 8231 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8232 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8233 } 8234 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8235 8236 /* dump subassembling pattern */ 8237 if (pcbddc->dbg_flag && multilevel_allowed) { 8238 PetscCall(ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer)); 8239 } 8240 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8241 nedcfield = -1; 8242 corners = NULL; 8243 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8244 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8245 const PetscInt *idxs; 8246 ISLocalToGlobalMapping tmap; 8247 8248 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8249 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap)); 8250 /* allocate space for temporary storage */ 8251 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs)); 8252 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs2)); 8253 /* allocate for IS array */ 8254 nisdofs = pcbddc->n_ISForDofsLocal; 8255 if (pcbddc->nedclocal) { 8256 if (pcbddc->nedfield > -1) { 8257 nedcfield = pcbddc->nedfield; 8258 } else { 8259 nedcfield = 0; 8260 PetscCheck(!nisdofs,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8261 nisdofs = 1; 8262 } 8263 } 8264 nisneu = !!pcbddc->NeumannBoundariesLocal; 8265 nisvert = 0; /* nisvert is not used */ 8266 nis = nisdofs + nisneu + nisvert; 8267 PetscCall(PetscMalloc1(nis,&isarray)); 8268 /* dofs splitting */ 8269 for (i=0;i<nisdofs;i++) { 8270 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8271 if (nedcfield != i) { 8272 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize)); 8273 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8274 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8275 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8276 } else { 8277 PetscCall(ISGetLocalSize(pcbddc->nedclocal,&tsize)); 8278 PetscCall(ISGetIndices(pcbddc->nedclocal,&idxs)); 8279 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8280 PetscCheckFalse(tsize != nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8281 PetscCall(ISRestoreIndices(pcbddc->nedclocal,&idxs)); 8282 } 8283 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8284 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i])); 8285 /* PetscCall(ISView(isarray[i],0)); */ 8286 } 8287 /* neumann boundaries */ 8288 if (pcbddc->NeumannBoundariesLocal) { 8289 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8290 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize)); 8291 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8292 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8293 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8294 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8295 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs])); 8296 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8297 } 8298 /* coordinates */ 8299 if (pcbddc->corner_selected) { 8300 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8301 PetscCall(ISGetLocalSize(corners,&tsize)); 8302 PetscCall(ISGetIndices(corners,&idxs)); 8303 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8304 PetscCheckFalse(tsize != nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout); 8305 PetscCall(ISRestoreIndices(corners,&idxs)); 8306 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8307 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8308 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners)); 8309 } 8310 PetscCall(PetscFree(tidxs)); 8311 PetscCall(PetscFree(tidxs2)); 8312 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8313 } else { 8314 nis = 0; 8315 nisdofs = 0; 8316 nisneu = 0; 8317 nisvert = 0; 8318 isarray = NULL; 8319 } 8320 /* destroy no longer needed map */ 8321 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8322 8323 /* subassemble */ 8324 if (multilevel_allowed) { 8325 Vec vp[1]; 8326 PetscInt nvecs = 0; 8327 PetscBool reuse,reuser; 8328 8329 if (coarse_mat) reuse = PETSC_TRUE; 8330 else reuse = PETSC_FALSE; 8331 PetscCall(MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8332 vp[0] = NULL; 8333 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8334 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&vp[0])); 8335 PetscCall(VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE)); 8336 PetscCall(VecSetType(vp[0],VECSTANDARD)); 8337 nvecs = 1; 8338 8339 if (pcbddc->divudotp) { 8340 Mat B,loc_divudotp; 8341 Vec v,p; 8342 IS dummy; 8343 PetscInt np; 8344 8345 PetscCall(MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp)); 8346 PetscCall(MatGetSize(loc_divudotp,&np,NULL)); 8347 PetscCall(ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy)); 8348 PetscCall(MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B)); 8349 PetscCall(MatCreateVecs(B,&v,&p)); 8350 PetscCall(VecSet(p,1.)); 8351 PetscCall(MatMultTranspose(B,p,v)); 8352 PetscCall(VecDestroy(&p)); 8353 PetscCall(MatDestroy(&B)); 8354 PetscCall(VecGetArray(vp[0],&array)); 8355 PetscCall(VecPlaceArray(pcbddc->vec1_P,array)); 8356 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P)); 8357 PetscCall(VecResetArray(pcbddc->vec1_P)); 8358 PetscCall(VecRestoreArray(vp[0],&array)); 8359 PetscCall(ISDestroy(&dummy)); 8360 PetscCall(VecDestroy(&v)); 8361 } 8362 } 8363 if (reuser) { 8364 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp)); 8365 } else { 8366 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp)); 8367 } 8368 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8369 PetscScalar *arraym; 8370 const PetscScalar *arrayv; 8371 PetscInt nl; 8372 PetscCall(VecGetLocalSize(vp[0],&nl)); 8373 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp)); 8374 PetscCall(MatDenseGetArray(coarsedivudotp,&arraym)); 8375 PetscCall(VecGetArrayRead(vp[0],&arrayv)); 8376 PetscCall(PetscArraycpy(arraym,arrayv,nl)); 8377 PetscCall(VecRestoreArrayRead(vp[0],&arrayv)); 8378 PetscCall(MatDenseRestoreArray(coarsedivudotp,&arraym)); 8379 PetscCall(VecDestroy(&vp[0])); 8380 } else { 8381 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp)); 8382 } 8383 } else { 8384 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL)); 8385 } 8386 if (coarse_mat_is || coarse_mat) { 8387 if (!multilevel_allowed) { 8388 PetscCall(MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat)); 8389 } else { 8390 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8391 if (coarse_mat_is) { 8392 PetscCheck(!coarse_mat,PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8393 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8394 coarse_mat = coarse_mat_is; 8395 } 8396 } 8397 } 8398 PetscCall(MatDestroy(&t_coarse_mat_is)); 8399 PetscCall(MatDestroy(&coarse_mat_is)); 8400 8401 /* create local to global scatters for coarse problem */ 8402 if (compute_vecs) { 8403 PetscInt lrows; 8404 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8405 if (coarse_mat) { 8406 PetscCall(MatGetLocalSize(coarse_mat,&lrows,NULL)); 8407 } else { 8408 lrows = 0; 8409 } 8410 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec)); 8411 PetscCall(VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE)); 8412 PetscCall(VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8413 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8414 PetscCall(VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob)); 8415 } 8416 PetscCall(ISDestroy(&coarse_is)); 8417 8418 /* set defaults for coarse KSP and PC */ 8419 if (multilevel_allowed) { 8420 coarse_ksp_type = KSPRICHARDSON; 8421 coarse_pc_type = PCBDDC; 8422 } else { 8423 coarse_ksp_type = KSPPREONLY; 8424 coarse_pc_type = PCREDUNDANT; 8425 } 8426 8427 /* print some info if requested */ 8428 if (pcbddc->dbg_flag) { 8429 if (!multilevel_allowed) { 8430 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8431 if (multilevel_requested) { 8432 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio)); 8433 } else if (pcbddc->max_levels) { 8434 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels)); 8435 } 8436 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8437 } 8438 } 8439 8440 /* communicate coarse discrete gradient */ 8441 coarseG = NULL; 8442 if (pcbddc->nedcG && multilevel_allowed) { 8443 MPI_Comm ccomm; 8444 if (coarse_mat) { 8445 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8446 } else { 8447 ccomm = MPI_COMM_NULL; 8448 } 8449 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG)); 8450 } 8451 8452 /* create the coarse KSP object only once with defaults */ 8453 if (coarse_mat) { 8454 PetscBool isredundant,isbddc,force,valid; 8455 PetscViewer dbg_viewer = NULL; 8456 8457 if (pcbddc->dbg_flag) { 8458 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8459 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level)); 8460 } 8461 if (!pcbddc->coarse_ksp) { 8462 char prefix[256],str_level[16]; 8463 size_t len; 8464 8465 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp)); 8466 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure)); 8467 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1)); 8468 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1)); 8469 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8470 PetscCall(KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type)); 8471 PetscCall(KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE)); 8472 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8473 /* TODO is this logic correct? should check for coarse_mat type */ 8474 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8475 /* prefix */ 8476 PetscCall(PetscStrcpy(prefix,"")); 8477 PetscCall(PetscStrcpy(str_level,"")); 8478 if (!pcbddc->current_level) { 8479 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix))); 8480 PetscCall(PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix))); 8481 } else { 8482 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 8483 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8484 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8485 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8486 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1)); 8487 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 8488 PetscCall(PetscStrlcat(prefix,str_level,sizeof(prefix))); 8489 } 8490 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix)); 8491 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8492 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8493 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8494 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8495 /* allow user customization */ 8496 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8497 /* get some info after set from options */ 8498 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8499 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8500 force = PETSC_FALSE; 8501 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8502 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8503 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8504 if (multilevel_allowed && !force && !valid) { 8505 isbddc = PETSC_TRUE; 8506 PetscCall(PCSetType(pc_temp,PCBDDC)); 8507 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8508 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8509 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8510 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8511 ierr = PetscObjectOptionsBegin((PetscObject)pc_temp);PetscCall(ierr); 8512 PetscCall((*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp)); 8513 PetscCall(PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp)); 8514 ierr = PetscOptionsEnd();PetscCall(ierr); 8515 pc_temp->setfromoptionscalled++; 8516 } 8517 } 8518 } 8519 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8520 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8521 if (nisdofs) { 8522 PetscCall(PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray)); 8523 for (i=0;i<nisdofs;i++) { 8524 PetscCall(ISDestroy(&isarray[i])); 8525 } 8526 } 8527 if (nisneu) { 8528 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs])); 8529 PetscCall(ISDestroy(&isarray[nisdofs])); 8530 } 8531 if (nisvert) { 8532 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1])); 8533 PetscCall(ISDestroy(&isarray[nis-1])); 8534 } 8535 if (coarseG) { 8536 PetscCall(PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE)); 8537 } 8538 8539 /* get some info after set from options */ 8540 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8541 8542 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8543 if (isbddc && !multilevel_allowed) { 8544 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8545 } 8546 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8547 force = PETSC_FALSE; 8548 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8549 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8550 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8551 PetscCall(PCSetType(pc_temp,PCBDDC)); 8552 } 8553 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant)); 8554 if (isredundant) { 8555 KSP inner_ksp; 8556 PC inner_pc; 8557 8558 PetscCall(PCRedundantGetKSP(pc_temp,&inner_ksp)); 8559 PetscCall(KSPGetPC(inner_ksp,&inner_pc)); 8560 } 8561 8562 /* parameters which miss an API */ 8563 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8564 if (isbddc) { 8565 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8566 8567 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8568 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8569 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8570 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8571 if (pcbddc_coarse->benign_saddle_point) { 8572 Mat coarsedivudotp_is; 8573 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8574 IS row,col; 8575 const PetscInt *gidxs; 8576 PetscInt n,st,M,N; 8577 8578 PetscCall(MatGetSize(coarsedivudotp,&n,NULL)); 8579 PetscCallMPI(MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat))); 8580 st = st-n; 8581 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row)); 8582 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL)); 8583 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap,&n)); 8584 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 8585 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col)); 8586 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 8587 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 8588 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 8589 PetscCall(ISGetSize(row,&M)); 8590 PetscCall(MatGetSize(coarse_mat,&N,NULL)); 8591 PetscCall(ISDestroy(&row)); 8592 PetscCall(ISDestroy(&col)); 8593 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is)); 8594 PetscCall(MatSetType(coarsedivudotp_is,MATIS)); 8595 PetscCall(MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N)); 8596 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g)); 8597 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8598 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8599 PetscCall(MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp)); 8600 PetscCall(MatDestroy(&coarsedivudotp)); 8601 PetscCall(PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL)); 8602 PetscCall(MatDestroy(&coarsedivudotp_is)); 8603 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8604 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8605 } 8606 } 8607 8608 /* propagate symmetry info of coarse matrix */ 8609 PetscCall(MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 8610 if (pc->pmat->symmetric_set) { 8611 PetscCall(MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric)); 8612 } 8613 if (pc->pmat->hermitian_set) { 8614 PetscCall(MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian)); 8615 } 8616 if (pc->pmat->spd_set) { 8617 PetscCall(MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd)); 8618 } 8619 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8620 PetscCall(MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE)); 8621 } 8622 /* set operators */ 8623 PetscCall(MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view")); 8624 PetscCall(MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix)); 8625 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8626 if (pcbddc->dbg_flag) { 8627 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level)); 8628 } 8629 } 8630 PetscCall(MatDestroy(&coarseG)); 8631 PetscCall(PetscFree(isarray)); 8632 #if 0 8633 { 8634 PetscViewer viewer; 8635 char filename[256]; 8636 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8637 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8638 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8639 PetscCall(MatView(coarse_mat,viewer)); 8640 PetscCall(PetscViewerPopFormat(viewer)); 8641 PetscCall(PetscViewerDestroy(&viewer)); 8642 } 8643 #endif 8644 8645 if (corners) { 8646 Vec gv; 8647 IS is; 8648 const PetscInt *idxs; 8649 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8650 PetscScalar *coords; 8651 8652 PetscCheck(pcbddc->mat_graph->cloc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8653 PetscCall(VecGetSize(pcbddc->coarse_vec,&N)); 8654 PetscCall(VecGetLocalSize(pcbddc->coarse_vec,&n)); 8655 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv)); 8656 PetscCall(VecSetBlockSize(gv,cdim)); 8657 PetscCall(VecSetSizes(gv,n*cdim,N*cdim)); 8658 PetscCall(VecSetType(gv,VECSTANDARD)); 8659 PetscCall(VecSetFromOptions(gv)); 8660 PetscCall(VecSet(gv,PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8661 8662 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8663 PetscCall(ISGetLocalSize(is,&n)); 8664 PetscCall(ISGetIndices(is,&idxs)); 8665 PetscCall(PetscMalloc1(n*cdim,&coords)); 8666 for (i=0;i<n;i++) { 8667 for (d=0;d<cdim;d++) { 8668 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8669 } 8670 } 8671 PetscCall(ISRestoreIndices(is,&idxs)); 8672 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8673 8674 PetscCall(ISGetLocalSize(corners,&n)); 8675 PetscCall(ISGetIndices(corners,&idxs)); 8676 PetscCall(VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES)); 8677 PetscCall(ISRestoreIndices(corners,&idxs)); 8678 PetscCall(PetscFree(coords)); 8679 PetscCall(VecAssemblyBegin(gv)); 8680 PetscCall(VecAssemblyEnd(gv)); 8681 PetscCall(VecGetArray(gv,&coords)); 8682 if (pcbddc->coarse_ksp) { 8683 PC coarse_pc; 8684 PetscBool isbddc; 8685 8686 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 8687 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 8688 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8689 PetscReal *realcoords; 8690 8691 PetscCall(VecGetLocalSize(gv,&n)); 8692 #if defined(PETSC_USE_COMPLEX) 8693 PetscCall(PetscMalloc1(n,&realcoords)); 8694 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8695 #else 8696 realcoords = coords; 8697 #endif 8698 PetscCall(PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords)); 8699 #if defined(PETSC_USE_COMPLEX) 8700 PetscCall(PetscFree(realcoords)); 8701 #endif 8702 } 8703 } 8704 PetscCall(VecRestoreArray(gv,&coords)); 8705 PetscCall(VecDestroy(&gv)); 8706 } 8707 PetscCall(ISDestroy(&corners)); 8708 8709 if (pcbddc->coarse_ksp) { 8710 Vec crhs,csol; 8711 8712 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&csol)); 8713 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&crhs)); 8714 if (!csol) { 8715 PetscCall(MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL)); 8716 } 8717 if (!crhs) { 8718 PetscCall(MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs))); 8719 } 8720 } 8721 PetscCall(MatDestroy(&coarsedivudotp)); 8722 8723 /* compute null space for coarse solver if the benign trick has been requested */ 8724 if (pcbddc->benign_null) { 8725 8726 PetscCall(VecSet(pcbddc->vec1_P,0.)); 8727 for (i=0;i<pcbddc->benign_n;i++) { 8728 PetscCall(VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES)); 8729 } 8730 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8731 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8732 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8733 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8734 if (coarse_mat) { 8735 Vec nullv; 8736 PetscScalar *array,*array2; 8737 PetscInt nl; 8738 8739 PetscCall(MatCreateVecs(coarse_mat,&nullv,NULL)); 8740 PetscCall(VecGetLocalSize(nullv,&nl)); 8741 PetscCall(VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8742 PetscCall(VecGetArray(nullv,&array2)); 8743 PetscCall(PetscArraycpy(array2,array,nl)); 8744 PetscCall(VecRestoreArray(nullv,&array2)); 8745 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8746 PetscCall(VecNormalize(nullv,NULL)); 8747 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace)); 8748 PetscCall(VecDestroy(&nullv)); 8749 } 8750 } 8751 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8752 8753 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8754 if (pcbddc->coarse_ksp) { 8755 PetscBool ispreonly; 8756 8757 if (CoarseNullSpace) { 8758 PetscBool isnull; 8759 PetscCall(MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull)); 8760 if (isnull) { 8761 PetscCall(MatSetNullSpace(coarse_mat,CoarseNullSpace)); 8762 } 8763 /* TODO: add local nullspaces (if any) */ 8764 } 8765 /* setup coarse ksp */ 8766 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8767 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8768 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly)); 8769 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8770 KSP check_ksp; 8771 KSPType check_ksp_type; 8772 PC check_pc; 8773 Vec check_vec,coarse_vec; 8774 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8775 PetscInt its; 8776 PetscBool compute_eigs; 8777 PetscReal *eigs_r,*eigs_c; 8778 PetscInt neigs; 8779 const char *prefix; 8780 8781 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8782 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp)); 8783 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0)); 8784 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE)); 8785 PetscCall(KSPSetOperators(check_ksp,coarse_mat,coarse_mat)); 8786 PetscCall(KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size)); 8787 /* prevent from setup unneeded object */ 8788 PetscCall(KSPGetPC(check_ksp,&check_pc)); 8789 PetscCall(PCSetType(check_pc,PCNONE)); 8790 if (ispreonly) { 8791 check_ksp_type = KSPPREONLY; 8792 compute_eigs = PETSC_FALSE; 8793 } else { 8794 check_ksp_type = KSPGMRES; 8795 compute_eigs = PETSC_TRUE; 8796 } 8797 PetscCall(KSPSetType(check_ksp,check_ksp_type)); 8798 PetscCall(KSPSetComputeSingularValues(check_ksp,compute_eigs)); 8799 PetscCall(KSPSetComputeEigenvalues(check_ksp,compute_eigs)); 8800 PetscCall(KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1)); 8801 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix)); 8802 PetscCall(KSPSetOptionsPrefix(check_ksp,prefix)); 8803 PetscCall(KSPAppendOptionsPrefix(check_ksp,"check_")); 8804 PetscCall(KSPSetFromOptions(check_ksp)); 8805 PetscCall(KSPSetUp(check_ksp)); 8806 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&check_pc)); 8807 PetscCall(KSPSetPC(check_ksp,check_pc)); 8808 /* create random vec */ 8809 PetscCall(MatCreateVecs(coarse_mat,&coarse_vec,&check_vec)); 8810 PetscCall(VecSetRandom(check_vec,NULL)); 8811 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8812 /* solve coarse problem */ 8813 PetscCall(KSPSolve(check_ksp,coarse_vec,coarse_vec)); 8814 PetscCall(KSPCheckSolve(check_ksp,pc,coarse_vec)); 8815 /* set eigenvalue estimation if preonly has not been requested */ 8816 if (compute_eigs) { 8817 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_r)); 8818 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_c)); 8819 PetscCall(KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs)); 8820 if (neigs) { 8821 lambda_max = eigs_r[neigs-1]; 8822 lambda_min = eigs_r[0]; 8823 if (pcbddc->use_coarse_estimates) { 8824 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8825 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min)); 8826 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min))); 8827 } 8828 } 8829 } 8830 } 8831 8832 /* check coarse problem residual error */ 8833 if (pcbddc->dbg_flag) { 8834 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8835 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1))); 8836 PetscCall(VecAXPY(check_vec,-1.0,coarse_vec)); 8837 PetscCall(VecNorm(check_vec,NORM_INFINITY,&infty_error)); 8838 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8839 PetscCall(VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error)); 8840 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates)); 8841 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer)); 8842 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer)); 8843 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error)); 8844 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error)); 8845 if (CoarseNullSpace) { 8846 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n")); 8847 } 8848 if (compute_eigs) { 8849 PetscReal lambda_max_s,lambda_min_s; 8850 KSPConvergedReason reason; 8851 PetscCall(KSPGetType(check_ksp,&check_ksp_type)); 8852 PetscCall(KSPGetIterationNumber(check_ksp,&its)); 8853 PetscCall(KSPGetConvergedReason(check_ksp,&reason)); 8854 PetscCall(KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s)); 8855 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s)); 8856 for (i=0;i<neigs;i++) { 8857 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i])); 8858 } 8859 } 8860 PetscCall(PetscViewerFlush(dbg_viewer)); 8861 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1))); 8862 } 8863 PetscCall(VecDestroy(&check_vec)); 8864 PetscCall(VecDestroy(&coarse_vec)); 8865 PetscCall(KSPDestroy(&check_ksp)); 8866 if (compute_eigs) { 8867 PetscCall(PetscFree(eigs_r)); 8868 PetscCall(PetscFree(eigs_c)); 8869 } 8870 } 8871 } 8872 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8873 /* print additional info */ 8874 if (pcbddc->dbg_flag) { 8875 /* waits until all processes reaches this point */ 8876 PetscCall(PetscBarrier((PetscObject)pc)); 8877 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level)); 8878 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8879 } 8880 8881 /* free memory */ 8882 PetscCall(MatDestroy(&coarse_mat)); 8883 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8884 PetscFunctionReturn(0); 8885 } 8886 8887 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8888 { 8889 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8890 PC_IS* pcis = (PC_IS*)pc->data; 8891 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8892 IS subset,subset_mult,subset_n; 8893 PetscInt local_size,coarse_size=0; 8894 PetscInt *local_primal_indices=NULL; 8895 const PetscInt *t_local_primal_indices; 8896 8897 PetscFunctionBegin; 8898 /* Compute global number of coarse dofs */ 8899 PetscCheckFalse(pcbddc->local_primal_size && !pcbddc->local_primal_ref_node,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8900 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n)); 8901 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset)); 8902 PetscCall(ISDestroy(&subset_n)); 8903 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult)); 8904 PetscCall(ISRenumber(subset,subset_mult,&coarse_size,&subset_n)); 8905 PetscCall(ISDestroy(&subset)); 8906 PetscCall(ISDestroy(&subset_mult)); 8907 PetscCall(ISGetLocalSize(subset_n,&local_size)); 8908 PetscCheckFalse(local_size != pcbddc->local_primal_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8909 PetscCall(PetscMalloc1(local_size,&local_primal_indices)); 8910 PetscCall(ISGetIndices(subset_n,&t_local_primal_indices)); 8911 PetscCall(PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size)); 8912 PetscCall(ISRestoreIndices(subset_n,&t_local_primal_indices)); 8913 PetscCall(ISDestroy(&subset_n)); 8914 8915 /* check numbering */ 8916 if (pcbddc->dbg_flag) { 8917 PetscScalar coarsesum,*array,*array2; 8918 PetscInt i; 8919 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8920 8921 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8922 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8923 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n")); 8924 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8925 /* counter */ 8926 PetscCall(VecSet(pcis->vec1_global,0.0)); 8927 PetscCall(VecSet(pcis->vec1_N,1.0)); 8928 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8929 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8930 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8931 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8932 PetscCall(VecSet(pcis->vec1_N,0.0)); 8933 for (i=0;i<pcbddc->local_primal_size;i++) { 8934 PetscCall(VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES)); 8935 } 8936 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8937 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8938 PetscCall(VecSet(pcis->vec1_global,0.0)); 8939 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8940 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8941 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8942 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8943 PetscCall(VecGetArray(pcis->vec1_N,&array)); 8944 PetscCall(VecGetArray(pcis->vec2_N,&array2)); 8945 for (i=0;i<pcis->n;i++) { 8946 if (array[i] != 0.0 && array[i] != array2[i]) { 8947 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8948 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8949 set_error = PETSC_TRUE; 8950 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi)); 8951 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh)); 8952 } 8953 } 8954 PetscCall(VecRestoreArray(pcis->vec2_N,&array2)); 8955 PetscCall(MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8956 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8957 for (i=0;i<pcis->n;i++) { 8958 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8959 } 8960 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 8961 PetscCall(VecSet(pcis->vec1_global,0.0)); 8962 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8963 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8964 PetscCall(VecSum(pcis->vec1_global,&coarsesum)); 8965 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum))); 8966 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8967 PetscInt *gidxs; 8968 8969 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&gidxs)); 8970 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs)); 8971 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n")); 8972 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8973 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank)); 8974 for (i=0;i<pcbddc->local_primal_size;i++) { 8975 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i])); 8976 } 8977 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8978 PetscCall(PetscFree(gidxs)); 8979 } 8980 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8981 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8982 PetscCheck(!set_error_reduced,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8983 } 8984 8985 /* get back data */ 8986 *coarse_size_n = coarse_size; 8987 *local_primal_indices_n = local_primal_indices; 8988 PetscFunctionReturn(0); 8989 } 8990 8991 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8992 { 8993 IS localis_t; 8994 PetscInt i,lsize,*idxs,n; 8995 PetscScalar *vals; 8996 8997 PetscFunctionBegin; 8998 /* get indices in local ordering exploiting local to global map */ 8999 PetscCall(ISGetLocalSize(globalis,&lsize)); 9000 PetscCall(PetscMalloc1(lsize,&vals)); 9001 for (i=0;i<lsize;i++) vals[i] = 1.0; 9002 PetscCall(ISGetIndices(globalis,(const PetscInt**)&idxs)); 9003 PetscCall(VecSet(gwork,0.0)); 9004 PetscCall(VecSet(lwork,0.0)); 9005 if (idxs) { /* multilevel guard */ 9006 PetscCall(VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE)); 9007 PetscCall(VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES)); 9008 } 9009 PetscCall(VecAssemblyBegin(gwork)); 9010 PetscCall(ISRestoreIndices(globalis,(const PetscInt**)&idxs)); 9011 PetscCall(PetscFree(vals)); 9012 PetscCall(VecAssemblyEnd(gwork)); 9013 /* now compute set in local ordering */ 9014 PetscCall(VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 9015 PetscCall(VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 9016 PetscCall(VecGetArrayRead(lwork,(const PetscScalar**)&vals)); 9017 PetscCall(VecGetSize(lwork,&n)); 9018 for (i=0,lsize=0;i<n;i++) { 9019 if (PetscRealPart(vals[i]) > 0.5) { 9020 lsize++; 9021 } 9022 } 9023 PetscCall(PetscMalloc1(lsize,&idxs)); 9024 for (i=0,lsize=0;i<n;i++) { 9025 if (PetscRealPart(vals[i]) > 0.5) { 9026 idxs[lsize++] = i; 9027 } 9028 } 9029 PetscCall(VecRestoreArrayRead(lwork,(const PetscScalar**)&vals)); 9030 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t)); 9031 *localis = localis_t; 9032 PetscFunctionReturn(0); 9033 } 9034 9035 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9036 { 9037 PC_IS *pcis=(PC_IS*)pc->data; 9038 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9039 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9040 Mat S_j; 9041 PetscInt *used_xadj,*used_adjncy; 9042 PetscBool free_used_adj; 9043 PetscErrorCode ierr; 9044 9045 PetscFunctionBegin; 9046 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9047 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9048 free_used_adj = PETSC_FALSE; 9049 if (pcbddc->sub_schurs_layers == -1) { 9050 used_xadj = NULL; 9051 used_adjncy = NULL; 9052 } else { 9053 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9054 used_xadj = pcbddc->mat_graph->xadj; 9055 used_adjncy = pcbddc->mat_graph->adjncy; 9056 } else if (pcbddc->computed_rowadj) { 9057 used_xadj = pcbddc->mat_graph->xadj; 9058 used_adjncy = pcbddc->mat_graph->adjncy; 9059 } else { 9060 PetscBool flg_row=PETSC_FALSE; 9061 const PetscInt *xadj,*adjncy; 9062 PetscInt nvtxs; 9063 9064 PetscCall(MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9065 if (flg_row) { 9066 PetscCall(PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy)); 9067 PetscCall(PetscArraycpy(used_xadj,xadj,nvtxs+1)); 9068 PetscCall(PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs])); 9069 free_used_adj = PETSC_TRUE; 9070 } else { 9071 pcbddc->sub_schurs_layers = -1; 9072 used_xadj = NULL; 9073 used_adjncy = NULL; 9074 } 9075 PetscCall(MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9076 } 9077 } 9078 9079 /* setup sub_schurs data */ 9080 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9081 if (!sub_schurs->schur_explicit) { 9082 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9083 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9084 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL)); 9085 } else { 9086 Mat change = NULL; 9087 Vec scaling = NULL; 9088 IS change_primal = NULL, iP; 9089 PetscInt benign_n; 9090 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9091 PetscBool need_change = PETSC_FALSE; 9092 PetscBool discrete_harmonic = PETSC_FALSE; 9093 9094 if (!pcbddc->use_vertices && reuse_solvers) { 9095 PetscInt n_vertices; 9096 9097 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 9098 reuse_solvers = (PetscBool)!n_vertices; 9099 } 9100 if (!pcbddc->benign_change_explicit) { 9101 benign_n = pcbddc->benign_n; 9102 } else { 9103 benign_n = 0; 9104 } 9105 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9106 We need a global reduction to avoid possible deadlocks. 9107 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9108 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9109 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9110 PetscCall(MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 9111 need_change = (PetscBool)(!need_change); 9112 } 9113 /* If the user defines additional constraints, we import them here. 9114 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 9115 if (need_change) { 9116 PC_IS *pcisf; 9117 PC_BDDC *pcbddcf; 9118 PC pcf; 9119 9120 PetscCheck(!pcbddc->sub_schurs_rebuild,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9121 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc),&pcf)); 9122 PetscCall(PCSetOperators(pcf,pc->mat,pc->pmat)); 9123 PetscCall(PCSetType(pcf,PCBDDC)); 9124 9125 /* hacks */ 9126 pcisf = (PC_IS*)pcf->data; 9127 pcisf->is_B_local = pcis->is_B_local; 9128 pcisf->vec1_N = pcis->vec1_N; 9129 pcisf->BtoNmap = pcis->BtoNmap; 9130 pcisf->n = pcis->n; 9131 pcisf->n_B = pcis->n_B; 9132 pcbddcf = (PC_BDDC*)pcf->data; 9133 PetscCall(PetscFree(pcbddcf->mat_graph)); 9134 pcbddcf->mat_graph = pcbddc->mat_graph; 9135 pcbddcf->use_faces = PETSC_TRUE; 9136 pcbddcf->use_change_of_basis = PETSC_TRUE; 9137 pcbddcf->use_change_on_faces = PETSC_TRUE; 9138 pcbddcf->use_qr_single = PETSC_TRUE; 9139 pcbddcf->fake_change = PETSC_TRUE; 9140 9141 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 9142 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9143 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 9144 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal)); 9145 change = pcbddcf->ConstraintMatrix; 9146 pcbddcf->ConstraintMatrix = NULL; 9147 9148 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 9149 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9150 PetscCall(MatNullSpaceDestroy(&pcbddcf->onearnullspace)); 9151 PetscCall(PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult)); 9152 PetscCall(PetscFree(pcbddcf->primal_indices_local_idxs)); 9153 PetscCall(PetscFree(pcbddcf->onearnullvecs_state)); 9154 PetscCall(PetscFree(pcf->data)); 9155 pcf->ops->destroy = NULL; 9156 pcf->ops->reset = NULL; 9157 PetscCall(PCDestroy(&pcf)); 9158 } 9159 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9160 9161 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP)); 9162 if (iP) { 9163 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");PetscCall(ierr); 9164 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL)); 9165 ierr = PetscOptionsEnd();PetscCall(ierr); 9166 } 9167 if (discrete_harmonic) { 9168 Mat A; 9169 PetscCall(MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A)); 9170 PetscCall(MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL)); 9171 PetscCall(PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP)); 9172 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9173 PetscCall(MatDestroy(&A)); 9174 } else { 9175 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9176 } 9177 PetscCall(MatDestroy(&change)); 9178 PetscCall(ISDestroy(&change_primal)); 9179 } 9180 PetscCall(MatDestroy(&S_j)); 9181 9182 /* free adjacency */ 9183 if (free_used_adj) { 9184 PetscCall(PetscFree2(used_xadj,used_adjncy)); 9185 } 9186 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9187 PetscFunctionReturn(0); 9188 } 9189 9190 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9191 { 9192 PC_IS *pcis=(PC_IS*)pc->data; 9193 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9194 PCBDDCGraph graph; 9195 9196 PetscFunctionBegin; 9197 /* attach interface graph for determining subsets */ 9198 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9199 IS verticesIS,verticescomm; 9200 PetscInt vsize,*idxs; 9201 9202 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9203 PetscCall(ISGetSize(verticesIS,&vsize)); 9204 PetscCall(ISGetIndices(verticesIS,(const PetscInt**)&idxs)); 9205 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm)); 9206 PetscCall(ISRestoreIndices(verticesIS,(const PetscInt**)&idxs)); 9207 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9208 PetscCall(PCBDDCGraphCreate(&graph)); 9209 PetscCall(PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount)); 9210 PetscCall(PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm)); 9211 PetscCall(ISDestroy(&verticescomm)); 9212 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9213 } else { 9214 graph = pcbddc->mat_graph; 9215 } 9216 /* print some info */ 9217 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9218 IS vertices; 9219 PetscInt nv,nedges,nfaces; 9220 PetscCall(PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 9221 PetscCall(PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9222 PetscCall(ISGetSize(vertices,&nv)); 9223 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9224 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 9225 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices)); 9226 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges)); 9227 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces)); 9228 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9229 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9230 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9231 } 9232 9233 /* sub_schurs init */ 9234 if (!pcbddc->sub_schurs) { 9235 PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9236 } 9237 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild)); 9238 9239 /* free graph struct */ 9240 if (pcbddc->sub_schurs_rebuild) { 9241 PetscCall(PCBDDCGraphDestroy(&graph)); 9242 } 9243 PetscFunctionReturn(0); 9244 } 9245 9246 PetscErrorCode PCBDDCCheckOperator(PC pc) 9247 { 9248 PC_IS *pcis=(PC_IS*)pc->data; 9249 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9250 9251 PetscFunctionBegin; 9252 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9253 IS zerodiag = NULL; 9254 Mat S_j,B0_B=NULL; 9255 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9256 PetscScalar *p0_check,*array,*array2; 9257 PetscReal norm; 9258 PetscInt i; 9259 9260 /* B0 and B0_B */ 9261 if (zerodiag) { 9262 IS dummy; 9263 9264 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy)); 9265 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 9266 PetscCall(MatCreateVecs(B0_B,NULL,&dummy_vec)); 9267 PetscCall(ISDestroy(&dummy)); 9268 } 9269 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9270 PetscCall(VecDuplicate(pcbddc->vec1_P,&vec_scale_P)); 9271 PetscCall(VecSet(pcbddc->vec1_P,1.0)); 9272 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9273 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9274 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9275 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9276 PetscCall(VecReciprocal(vec_scale_P)); 9277 /* S_j */ 9278 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9279 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9280 9281 /* mimic vector in \widetilde{W}_\Gamma */ 9282 PetscCall(VecSetRandom(pcis->vec1_N,NULL)); 9283 /* continuous in primal space */ 9284 PetscCall(VecSetRandom(pcbddc->coarse_vec,NULL)); 9285 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9286 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9287 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9288 PetscCall(PetscCalloc1(pcbddc->benign_n,&p0_check)); 9289 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9290 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9291 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9292 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9293 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9294 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9295 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9296 PetscCall(VecDuplicate(pcis->vec2_B,&vec_check_B)); 9297 PetscCall(VecCopy(pcis->vec2_B,vec_check_B)); 9298 9299 /* assemble rhs for coarse problem */ 9300 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9301 /* local with Schur */ 9302 PetscCall(MatMult(S_j,pcis->vec2_B,pcis->vec1_B)); 9303 if (zerodiag) { 9304 PetscCall(VecGetArray(dummy_vec,&array)); 9305 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9306 PetscCall(VecRestoreArray(dummy_vec,&array)); 9307 PetscCall(MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B)); 9308 } 9309 /* sum on primal nodes the local contributions */ 9310 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9311 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9312 PetscCall(VecGetArray(pcis->vec1_N,&array)); 9313 PetscCall(VecGetArray(pcbddc->vec1_P,&array2)); 9314 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9315 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array2)); 9316 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 9317 PetscCall(VecSet(pcbddc->coarse_vec,0.)); 9318 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9319 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9320 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9321 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9322 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9323 /* scale primal nodes (BDDC sums contibutions) */ 9324 PetscCall(VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P)); 9325 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9326 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9327 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9328 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9329 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9330 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9331 /* global: \widetilde{B0}_B w_\Gamma */ 9332 if (zerodiag) { 9333 PetscCall(MatMult(B0_B,pcis->vec2_B,dummy_vec)); 9334 PetscCall(VecGetArray(dummy_vec,&array)); 9335 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9336 PetscCall(VecRestoreArray(dummy_vec,&array)); 9337 } 9338 /* BDDC */ 9339 PetscCall(VecSet(pcis->vec1_D,0.)); 9340 PetscCall(PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE)); 9341 9342 PetscCall(VecCopy(pcis->vec1_B,pcis->vec2_B)); 9343 PetscCall(VecAXPY(pcis->vec1_B,-1.0,vec_check_B)); 9344 PetscCall(VecNorm(pcis->vec1_B,NORM_INFINITY,&norm)); 9345 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm)); 9346 for (i=0;i<pcbddc->benign_n;i++) { 9347 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]))); 9348 } 9349 PetscCall(PetscFree(p0_check)); 9350 PetscCall(VecDestroy(&vec_scale_P)); 9351 PetscCall(VecDestroy(&vec_check_B)); 9352 PetscCall(VecDestroy(&dummy_vec)); 9353 PetscCall(MatDestroy(&S_j)); 9354 PetscCall(MatDestroy(&B0_B)); 9355 } 9356 PetscFunctionReturn(0); 9357 } 9358 9359 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9360 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9361 { 9362 Mat At; 9363 IS rows; 9364 PetscInt rst,ren; 9365 PetscLayout rmap; 9366 9367 PetscFunctionBegin; 9368 rst = ren = 0; 9369 if (ccomm != MPI_COMM_NULL) { 9370 PetscCall(PetscLayoutCreate(ccomm,&rmap)); 9371 PetscCall(PetscLayoutSetSize(rmap,A->rmap->N)); 9372 PetscCall(PetscLayoutSetBlockSize(rmap,1)); 9373 PetscCall(PetscLayoutSetUp(rmap)); 9374 PetscCall(PetscLayoutGetRange(rmap,&rst,&ren)); 9375 } 9376 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows)); 9377 PetscCall(MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At)); 9378 PetscCall(ISDestroy(&rows)); 9379 9380 if (ccomm != MPI_COMM_NULL) { 9381 Mat_MPIAIJ *a,*b; 9382 IS from,to; 9383 Vec gvec; 9384 PetscInt lsize; 9385 9386 PetscCall(MatCreate(ccomm,B)); 9387 PetscCall(MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N)); 9388 PetscCall(MatSetType(*B,MATAIJ)); 9389 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 9390 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9391 a = (Mat_MPIAIJ*)At->data; 9392 b = (Mat_MPIAIJ*)(*B)->data; 9393 PetscCallMPI(MPI_Comm_size(ccomm,&b->size)); 9394 PetscCallMPI(MPI_Comm_rank(ccomm,&b->rank)); 9395 PetscCall(PetscObjectReference((PetscObject)a->A)); 9396 PetscCall(PetscObjectReference((PetscObject)a->B)); 9397 b->A = a->A; 9398 b->B = a->B; 9399 9400 b->donotstash = a->donotstash; 9401 b->roworiented = a->roworiented; 9402 b->rowindices = NULL; 9403 b->rowvalues = NULL; 9404 b->getrowactive = PETSC_FALSE; 9405 9406 (*B)->rmap = rmap; 9407 (*B)->factortype = A->factortype; 9408 (*B)->assembled = PETSC_TRUE; 9409 (*B)->insertmode = NOT_SET_VALUES; 9410 (*B)->preallocated = PETSC_TRUE; 9411 9412 if (a->colmap) { 9413 #if defined(PETSC_USE_CTABLE) 9414 PetscCall(PetscTableCreateCopy(a->colmap,&b->colmap)); 9415 #else 9416 PetscCall(PetscMalloc1(At->cmap->N,&b->colmap)); 9417 PetscCall(PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt))); 9418 PetscCall(PetscArraycpy(b->colmap,a->colmap,At->cmap->N)); 9419 #endif 9420 } else b->colmap = NULL; 9421 if (a->garray) { 9422 PetscInt len; 9423 len = a->B->cmap->n; 9424 PetscCall(PetscMalloc1(len+1,&b->garray)); 9425 PetscCall(PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt))); 9426 if (len) PetscCall(PetscArraycpy(b->garray,a->garray,len)); 9427 } else b->garray = NULL; 9428 9429 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9430 b->lvec = a->lvec; 9431 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec)); 9432 9433 /* cannot use VecScatterCopy */ 9434 PetscCall(VecGetLocalSize(b->lvec,&lsize)); 9435 PetscCall(ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from)); 9436 PetscCall(ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to)); 9437 PetscCall(MatCreateVecs(*B,&gvec,NULL)); 9438 PetscCall(VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx)); 9439 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx)); 9440 PetscCall(ISDestroy(&from)); 9441 PetscCall(ISDestroy(&to)); 9442 PetscCall(VecDestroy(&gvec)); 9443 } 9444 PetscCall(MatDestroy(&At)); 9445 PetscFunctionReturn(0); 9446 } 9447