1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 PetscErrorCode ierr; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 28 if (!nr || !nc) PetscFunctionReturn(0); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 33 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr,nc); 39 if (!rwork) { 40 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 50 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 51 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 #else 55 ierr = PetscMalloc1(5*n,&rwork2);CHKERRQ(ierr); 56 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 57 ierr = PetscFree(rwork2);CHKERRQ(ierr); 58 #endif 59 ierr = PetscFPTrapPop();CHKERRQ(ierr); 60 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 61 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 62 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 63 if (!rwork) { 64 ierr = PetscFree(sing);CHKERRQ(ierr); 65 } 66 if (!work) { 67 ierr = PetscFree(uwork);CHKERRQ(ierr); 68 } 69 /* create B */ 70 if (!range) { 71 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 72 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscArraycpy(data,U+nr*i,(nr-i)*nr);CHKERRQ(ierr); 74 } else { 75 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 76 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 77 ierr = PetscArraycpy(data,U,i*nr);CHKERRQ(ierr); 78 } 79 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 80 ierr = PetscFree(U);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 const PetscScalar *vals; 122 PetscScalar v; 123 124 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 125 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 126 ierr = MatDenseGetArrayRead(GEd,&vals);CHKERRQ(ierr); 127 /* v = PetscAbsScalar(vals[0]) */; 128 v = 1.; 129 cvals[0] = vals[0]/v; 130 cvals[1] = vals[1]/v; 131 ierr = MatDenseRestoreArrayRead(GEd,&vals);CHKERRQ(ierr); 132 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 133 #if defined(PRINT_GDET) 134 { 135 PetscViewer viewer; 136 char filename[256]; 137 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 138 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 139 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 141 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 142 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 143 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 144 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 145 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 146 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 147 } 148 #endif 149 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 150 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 151 } 152 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode PCBDDCNedelecSupport(PC pc) 157 { 158 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 159 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 160 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 161 Vec tvec; 162 PetscSF sfv; 163 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 164 MPI_Comm comm; 165 IS lned,primals,allprimals,nedfieldlocal; 166 IS *eedges,*extrows,*extcols,*alleedges; 167 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 168 PetscScalar *vals,*work; 169 PetscReal *rwork; 170 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 171 PetscInt ne,nv,Lv,order,n,field; 172 PetscInt n_neigh,*neigh,*n_shared,**shared; 173 PetscInt i,j,extmem,cum,maxsize,nee; 174 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 175 PetscInt *sfvleaves,*sfvroots; 176 PetscInt *corners,*cedges; 177 PetscInt *ecount,**eneighs,*vcount,**vneighs; 178 PetscInt *emarks; 179 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 /* If the discrete gradient is defined for a subset of dofs and global is true, 184 it assumes G is given in global ordering for all the dofs. 185 Otherwise, the ordering is global for the Nedelec field */ 186 order = pcbddc->nedorder; 187 conforming = pcbddc->conforming; 188 field = pcbddc->nedfield; 189 global = pcbddc->nedglobal; 190 setprimal = PETSC_FALSE; 191 print = PETSC_FALSE; 192 singular = PETSC_FALSE; 193 194 /* Command line customization */ 195 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 196 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 199 /* print debug info TODO: to be removed */ 200 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 201 ierr = PetscOptionsEnd();CHKERRQ(ierr); 202 203 /* Return if there are no edges in the decomposition and the problem is not singular */ 204 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 205 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 206 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 207 if (!singular) { 208 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 209 lrc[0] = PETSC_FALSE; 210 for (i=0;i<n;i++) { 211 if (PetscRealPart(vals[i]) > 2.) { 212 lrc[0] = PETSC_TRUE; 213 break; 214 } 215 } 216 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 217 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 218 if (!lrc[1]) PetscFunctionReturn(0); 219 } 220 221 /* Get Nedelec field */ 222 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 223 if (pcbddc->n_ISForDofsLocal && field >= 0) { 224 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 225 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 226 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 227 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 228 ne = n; 229 nedfieldlocal = NULL; 230 global = PETSC_TRUE; 231 } else if (field == PETSC_DECIDE) { 232 PetscInt rst,ren,*idx; 233 234 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 235 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 236 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 237 for (i=rst;i<ren;i++) { 238 PetscInt nc; 239 240 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 242 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 } 244 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 245 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 246 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 247 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 248 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 249 } else { 250 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 251 } 252 253 /* Sanity checks */ 254 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 255 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 256 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 257 258 /* Just set primal dofs and return */ 259 if (setprimal) { 260 IS enedfieldlocal; 261 PetscInt *eidxs; 262 263 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 264 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 265 if (nedfieldlocal) { 266 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 267 for (i=0,cum=0;i<ne;i++) { 268 if (PetscRealPart(vals[idxs[i]]) > 2.) { 269 eidxs[cum++] = idxs[i]; 270 } 271 } 272 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 273 } else { 274 for (i=0,cum=0;i<ne;i++) { 275 if (PetscRealPart(vals[i]) > 2.) { 276 eidxs[cum++] = i; 277 } 278 } 279 } 280 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 281 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 282 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 283 ierr = PetscFree(eidxs);CHKERRQ(ierr); 284 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 285 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 /* Compute some l2g maps */ 290 if (nedfieldlocal) { 291 IS is; 292 293 /* need to map from the local Nedelec field to local numbering */ 294 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 295 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 296 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 297 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 298 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 299 if (global) { 300 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 301 el2g = al2g; 302 } else { 303 IS gis; 304 305 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 306 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 307 ierr = ISDestroy(&gis);CHKERRQ(ierr); 308 } 309 ierr = ISDestroy(&is);CHKERRQ(ierr); 310 } else { 311 /* restore default */ 312 pcbddc->nedfield = -1; 313 /* one ref for the destruction of al2g, one for el2g */ 314 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 el2g = al2g; 317 fl2g = NULL; 318 } 319 320 /* Start communication to drop connections for interior edges (for cc analysis only) */ 321 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 322 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 323 if (nedfieldlocal) { 324 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 326 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 } else { 328 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 329 } 330 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 331 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 333 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 334 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 335 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 336 if (global) { 337 PetscInt rst; 338 339 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 340 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 341 if (matis->sf_rootdata[i] < 2) { 342 matis->sf_rootdata[cum++] = i + rst; 343 } 344 } 345 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 346 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 347 } else { 348 PetscInt *tbz; 349 350 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 351 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 352 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 353 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 354 for (i=0,cum=0;i<ne;i++) 355 if (matis->sf_leafdata[idxs[i]] == 1) 356 tbz[cum++] = i; 357 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 358 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 359 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 360 ierr = PetscFree(tbz);CHKERRQ(ierr); 361 } 362 } else { /* we need the entire G to infer the nullspace */ 363 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 364 G = pcbddc->discretegradient; 365 } 366 367 /* Extract subdomain relevant rows of G */ 368 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 369 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 370 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 371 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 372 ierr = ISDestroy(&lned);CHKERRQ(ierr); 373 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 374 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 375 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 376 377 /* SF for nodal dofs communications */ 378 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 379 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 380 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 382 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 384 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 386 i = singular ? 2 : 1; 387 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 388 389 /* Destroy temporary G created in MATIS format and modified G */ 390 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 391 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 392 ierr = MatDestroy(&G);CHKERRQ(ierr); 393 394 if (print) { 395 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 396 ierr = MatView(lG,NULL);CHKERRQ(ierr); 397 } 398 399 /* Save lG for values insertion in change of basis */ 400 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 401 402 /* Analyze the edge-nodes connections (duplicate lG) */ 403 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 404 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 405 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 409 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 410 /* need to import the boundary specification to ensure the 411 proper detection of coarse edges' endpoints */ 412 if (pcbddc->DirichletBoundariesLocal) { 413 IS is; 414 415 if (fl2g) { 416 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 417 } else { 418 is = pcbddc->DirichletBoundariesLocal; 419 } 420 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 421 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 422 for (i=0;i<cum;i++) { 423 if (idxs[i] >= 0) { 424 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 425 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 426 } 427 } 428 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 429 if (fl2g) { 430 ierr = ISDestroy(&is);CHKERRQ(ierr); 431 } 432 } 433 if (pcbddc->NeumannBoundariesLocal) { 434 IS is; 435 436 if (fl2g) { 437 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 438 } else { 439 is = pcbddc->NeumannBoundariesLocal; 440 } 441 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 442 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 443 for (i=0;i<cum;i++) { 444 if (idxs[i] >= 0) { 445 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 446 } 447 } 448 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 449 if (fl2g) { 450 ierr = ISDestroy(&is);CHKERRQ(ierr); 451 } 452 } 453 454 /* Count neighs per dof */ 455 ierr = ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 456 ierr = ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 457 458 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 459 for proper detection of coarse edges' endpoints */ 460 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 461 for (i=0;i<ne;i++) { 462 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 463 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 467 if (!conforming) { 468 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 469 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 470 } 471 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 472 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 473 cum = 0; 474 for (i=0;i<ne;i++) { 475 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 476 if (!PetscBTLookup(btee,i)) { 477 marks[cum++] = i; 478 continue; 479 } 480 /* set badly connected edge dofs as primal */ 481 if (!conforming) { 482 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 483 marks[cum++] = i; 484 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 485 for (j=ii[i];j<ii[i+1];j++) { 486 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 487 } 488 } else { 489 /* every edge dofs should be connected trough a certain number of nodal dofs 490 to other edge dofs belonging to coarse edges 491 - at most 2 endpoints 492 - order-1 interior nodal dofs 493 - no undefined nodal dofs (nconn < order) 494 */ 495 PetscInt ends = 0,ints = 0, undef = 0; 496 for (j=ii[i];j<ii[i+1];j++) { 497 PetscInt v = jj[j],k; 498 PetscInt nconn = iit[v+1]-iit[v]; 499 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 500 if (nconn > order) ends++; 501 else if (nconn == order) ints++; 502 else undef++; 503 } 504 if (undef || ends > 2 || ints != order -1) { 505 marks[cum++] = i; 506 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 507 for (j=ii[i];j<ii[i+1];j++) { 508 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 509 } 510 } 511 } 512 } 513 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 514 if (!order && ii[i+1] != ii[i]) { 515 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 516 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 517 } 518 } 519 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 520 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 521 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 if (!conforming) { 523 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 524 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 525 } 526 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 527 528 /* identify splitpoints and corner candidates */ 529 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 530 if (print) { 531 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 532 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 533 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 534 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 535 } 536 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 537 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 538 for (i=0;i<nv;i++) { 539 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 540 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 541 if (!order) { /* variable order */ 542 PetscReal vorder = 0.; 543 544 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 545 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 546 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 547 ord = 1; 548 } 549 if (PetscUnlikelyDebug(test%ord)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord); 550 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 551 if (PetscBTLookup(btbd,jj[j])) { 552 bdir = PETSC_TRUE; 553 break; 554 } 555 if (vc != ecount[jj[j]]) { 556 sneighs = PETSC_FALSE; 557 } else { 558 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 559 for (k=0;k<vc;k++) { 560 if (vn[k] != en[k]) { 561 sneighs = PETSC_FALSE; 562 break; 563 } 564 } 565 } 566 } 567 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 568 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 569 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 570 } else if (test == ord) { 571 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 572 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 573 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 574 } else { 575 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 576 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 577 } 578 } 579 } 580 ierr = ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 581 ierr = ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 582 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 583 584 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 585 if (order != 1) { 586 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 587 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 if (PetscBTLookup(btvcand,i)) { 590 PetscBool found = PETSC_FALSE; 591 for (j=ii[i];j<ii[i+1] && !found;j++) { 592 PetscInt k,e = jj[j]; 593 if (PetscBTLookup(bte,e)) continue; 594 for (k=iit[e];k<iit[e+1];k++) { 595 PetscInt v = jjt[k]; 596 if (v != i && PetscBTLookup(btvcand,v)) { 597 found = PETSC_TRUE; 598 break; 599 } 600 } 601 } 602 if (!found) { 603 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 604 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 605 } else { 606 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 607 } 608 } 609 } 610 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 611 } 612 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 613 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 614 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 615 616 /* Get the local G^T explicitly */ 617 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 618 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 619 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 620 621 /* Mark interior nodal dofs */ 622 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 623 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 624 for (i=1;i<n_neigh;i++) { 625 for (j=0;j<n_shared[i];j++) { 626 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 627 } 628 } 629 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 630 631 /* communicate corners and splitpoints */ 632 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 633 ierr = PetscArrayzero(sfvleaves,nv);CHKERRQ(ierr); 634 ierr = PetscArrayzero(sfvroots,Lv);CHKERRQ(ierr); 635 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 636 637 if (print) { 638 IS tbz; 639 640 cum = 0; 641 for (i=0;i<nv;i++) 642 if (sfvleaves[i]) 643 vmarks[cum++] = i; 644 645 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 646 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 647 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 648 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 649 } 650 651 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 652 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 653 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 654 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 655 656 /* Zero rows of lGt corresponding to identified corners 657 and interior nodal dofs */ 658 cum = 0; 659 for (i=0;i<nv;i++) { 660 if (sfvleaves[i]) { 661 vmarks[cum++] = i; 662 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 663 } 664 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 665 } 666 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 667 if (print) { 668 IS tbz; 669 670 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 671 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 672 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 673 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 674 } 675 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 676 ierr = PetscFree(vmarks);CHKERRQ(ierr); 677 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 678 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 679 680 /* Recompute G */ 681 ierr = MatDestroy(&lG);CHKERRQ(ierr); 682 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 683 if (print) { 684 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 685 ierr = MatView(lG,NULL);CHKERRQ(ierr); 686 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 687 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 688 } 689 690 /* Get primal dofs (if any) */ 691 cum = 0; 692 for (i=0;i<ne;i++) { 693 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 694 } 695 if (fl2g) { 696 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 697 } 698 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 699 if (print) { 700 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 701 ierr = ISView(primals,NULL);CHKERRQ(ierr); 702 } 703 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 704 /* TODO: what if the user passed in some of them ? */ 705 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 706 ierr = ISDestroy(&primals);CHKERRQ(ierr); 707 708 /* Compute edge connectivity */ 709 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 710 711 /* Symbolic conn = lG*lGt */ 712 ierr = MatProductCreate(lG,lGt,NULL,&conn);CHKERRQ(ierr); 713 ierr = MatProductSetType(conn,MATPRODUCT_AB);CHKERRQ(ierr); 714 ierr = MatProductSetAlgorithm(conn,"default");CHKERRQ(ierr); 715 ierr = MatProductSetFill(conn,PETSC_DEFAULT);CHKERRQ(ierr); 716 ierr = PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");CHKERRQ(ierr); 717 ierr = MatProductSetFromOptions(conn);CHKERRQ(ierr); 718 ierr = MatProductSymbolic(conn);CHKERRQ(ierr); 719 720 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 721 if (fl2g) { 722 PetscBT btf; 723 PetscInt *iia,*jja,*iiu,*jju; 724 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 725 726 /* create CSR for all local dofs */ 727 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 728 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 729 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 730 iiu = pcbddc->mat_graph->xadj; 731 jju = pcbddc->mat_graph->adjncy; 732 } else if (pcbddc->use_local_adj) { 733 rest = PETSC_TRUE; 734 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 735 } else { 736 free = PETSC_TRUE; 737 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 738 iiu[0] = 0; 739 for (i=0;i<n;i++) { 740 iiu[i+1] = i+1; 741 jju[i] = -1; 742 } 743 } 744 745 /* import sizes of CSR */ 746 iia[0] = 0; 747 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 748 749 /* overwrite entries corresponding to the Nedelec field */ 750 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 751 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 752 for (i=0;i<ne;i++) { 753 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 754 iia[idxs[i]+1] = ii[i+1]-ii[i]; 755 } 756 757 /* iia in CSR */ 758 for (i=0;i<n;i++) iia[i+1] += iia[i]; 759 760 /* jja in CSR */ 761 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 762 for (i=0;i<n;i++) 763 if (!PetscBTLookup(btf,i)) 764 for (j=0;j<iiu[i+1]-iiu[i];j++) 765 jja[iia[i]+j] = jju[iiu[i]+j]; 766 767 /* map edge dofs connectivity */ 768 if (jj) { 769 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 770 for (i=0;i<ne;i++) { 771 PetscInt e = idxs[i]; 772 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 773 } 774 } 775 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 776 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 777 if (rest) { 778 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 779 } 780 if (free) { 781 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 782 } 783 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 784 } else { 785 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 786 } 787 788 /* Analyze interface for edge dofs */ 789 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 790 pcbddc->mat_graph->twodim = PETSC_FALSE; 791 792 /* Get coarse edges in the edge space */ 793 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 794 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 795 796 if (fl2g) { 797 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 798 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 799 for (i=0;i<nee;i++) { 800 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 801 } 802 } else { 803 eedges = alleedges; 804 primals = allprimals; 805 } 806 807 /* Mark fine edge dofs with their coarse edge id */ 808 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 809 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 810 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 811 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 812 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 813 if (print) { 814 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 815 ierr = ISView(primals,NULL);CHKERRQ(ierr); 816 } 817 818 maxsize = 0; 819 for (i=0;i<nee;i++) { 820 PetscInt size,mark = i+1; 821 822 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 823 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 824 for (j=0;j<size;j++) marks[idxs[j]] = mark; 825 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 826 maxsize = PetscMax(maxsize,size); 827 } 828 829 /* Find coarse edge endpoints */ 830 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 831 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 832 for (i=0;i<nee;i++) { 833 PetscInt mark = i+1,size; 834 835 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 836 if (!size && nedfieldlocal) continue; 837 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 838 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 839 if (print) { 840 ierr = PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);CHKERRQ(ierr); 841 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 842 } 843 for (j=0;j<size;j++) { 844 PetscInt k, ee = idxs[j]; 845 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 846 for (k=ii[ee];k<ii[ee+1];k++) { 847 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 848 if (PetscBTLookup(btv,jj[k])) { 849 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 850 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 851 PetscInt k2; 852 PetscBool corner = PETSC_FALSE; 853 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 854 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 855 /* it's a corner if either is connected with an edge dof belonging to a different cc or 856 if the edge dof lie on the natural part of the boundary */ 857 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 858 corner = PETSC_TRUE; 859 break; 860 } 861 } 862 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 863 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 864 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 865 } else { 866 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 867 } 868 } 869 } 870 } 871 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 872 } 873 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 874 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 875 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 876 877 /* Reset marked primal dofs */ 878 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 879 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 880 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 881 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 882 883 /* Now use the initial lG */ 884 ierr = MatDestroy(&lG);CHKERRQ(ierr); 885 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 886 lG = lGinit; 887 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 888 889 /* Compute extended cols indices */ 890 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 891 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 892 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 893 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 894 i *= maxsize; 895 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 896 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 897 eerr = PETSC_FALSE; 898 for (i=0;i<nee;i++) { 899 PetscInt size,found = 0; 900 901 cum = 0; 902 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 903 if (!size && nedfieldlocal) continue; 904 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 905 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 906 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 907 for (j=0;j<size;j++) { 908 PetscInt k,ee = idxs[j]; 909 for (k=ii[ee];k<ii[ee+1];k++) { 910 PetscInt vv = jj[k]; 911 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 912 else if (!PetscBTLookupSet(btvc,vv)) found++; 913 } 914 } 915 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 916 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 917 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 918 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 919 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 920 /* it may happen that endpoints are not defined at this point 921 if it is the case, mark this edge for a second pass */ 922 if (cum != size -1 || found != 2) { 923 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 924 if (print) { 925 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 926 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 927 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 928 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 929 } 930 eerr = PETSC_TRUE; 931 } 932 } 933 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 934 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 935 if (done) { 936 PetscInt *newprimals; 937 938 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 939 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 940 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 941 ierr = PetscArraycpy(newprimals,idxs,cum);CHKERRQ(ierr); 942 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 944 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 945 for (i=0;i<nee;i++) { 946 PetscBool has_candidates = PETSC_FALSE; 947 if (PetscBTLookup(bter,i)) { 948 PetscInt size,mark = i+1; 949 950 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 951 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 952 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 953 for (j=0;j<size;j++) { 954 PetscInt k,ee = idxs[j]; 955 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 956 for (k=ii[ee];k<ii[ee+1];k++) { 957 /* set all candidates located on the edge as corners */ 958 if (PetscBTLookup(btvcand,jj[k])) { 959 PetscInt k2,vv = jj[k]; 960 has_candidates = PETSC_TRUE; 961 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 962 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 963 /* set all edge dofs connected to candidate as primals */ 964 for (k2=iit[vv];k2<iit[vv+1];k2++) { 965 if (marks[jjt[k2]] == mark) { 966 PetscInt k3,ee2 = jjt[k2]; 967 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 968 newprimals[cum++] = ee2; 969 /* finally set the new corners */ 970 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 971 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 972 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 973 } 974 } 975 } 976 } else { 977 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 978 } 979 } 980 } 981 if (!has_candidates) { /* circular edge */ 982 PetscInt k, ee = idxs[0],*tmarks; 983 984 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 985 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 986 for (k=ii[ee];k<ii[ee+1];k++) { 987 PetscInt k2; 988 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 989 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 990 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 991 } 992 for (j=0;j<size;j++) { 993 if (tmarks[idxs[j]] > 1) { 994 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 995 newprimals[cum++] = idxs[j]; 996 } 997 } 998 ierr = PetscFree(tmarks);CHKERRQ(ierr); 999 } 1000 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1001 } 1002 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1003 } 1004 ierr = PetscFree(extcols);CHKERRQ(ierr); 1005 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1006 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1007 if (fl2g) { 1008 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1009 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1010 for (i=0;i<nee;i++) { 1011 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1012 } 1013 ierr = PetscFree(eedges);CHKERRQ(ierr); 1014 } 1015 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1016 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1017 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1018 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1019 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1020 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1021 pcbddc->mat_graph->twodim = PETSC_FALSE; 1022 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1023 if (fl2g) { 1024 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1025 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1026 for (i=0;i<nee;i++) { 1027 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1028 } 1029 } else { 1030 eedges = alleedges; 1031 primals = allprimals; 1032 } 1033 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1034 1035 /* Mark again */ 1036 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 1037 for (i=0;i<nee;i++) { 1038 PetscInt size,mark = i+1; 1039 1040 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1041 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1042 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1043 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1044 } 1045 if (print) { 1046 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1047 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1048 } 1049 1050 /* Recompute extended cols */ 1051 eerr = PETSC_FALSE; 1052 for (i=0;i<nee;i++) { 1053 PetscInt size; 1054 1055 cum = 0; 1056 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1057 if (!size && nedfieldlocal) continue; 1058 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1059 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1060 for (j=0;j<size;j++) { 1061 PetscInt k,ee = idxs[j]; 1062 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1063 } 1064 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1065 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1066 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1067 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1068 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1069 if (cum != size -1) { 1070 if (print) { 1071 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1072 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1073 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1074 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1075 } 1076 eerr = PETSC_TRUE; 1077 } 1078 } 1079 } 1080 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1081 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1082 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1083 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1084 /* an error should not occur at this point */ 1085 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1086 1087 /* Check the number of endpoints */ 1088 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1089 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1090 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1091 for (i=0;i<nee;i++) { 1092 PetscInt size, found = 0, gc[2]; 1093 1094 /* init with defaults */ 1095 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1096 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1097 if (!size && nedfieldlocal) continue; 1098 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1099 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1100 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1101 for (j=0;j<size;j++) { 1102 PetscInt k,ee = idxs[j]; 1103 for (k=ii[ee];k<ii[ee+1];k++) { 1104 PetscInt vv = jj[k]; 1105 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1106 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1107 corners[i*2+found++] = vv; 1108 } 1109 } 1110 } 1111 if (found != 2) { 1112 PetscInt e; 1113 if (fl2g) { 1114 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1115 } else { 1116 e = idxs[0]; 1117 } 1118 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1119 } 1120 1121 /* get primal dof index on this coarse edge */ 1122 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1123 if (gc[0] > gc[1]) { 1124 PetscInt swap = corners[2*i]; 1125 corners[2*i] = corners[2*i+1]; 1126 corners[2*i+1] = swap; 1127 } 1128 cedges[i] = idxs[size-1]; 1129 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1130 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1131 } 1132 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1134 1135 if (PetscDefined(USE_DEBUG)) { 1136 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1137 not interfere with neighbouring coarse edges */ 1138 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1139 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 for (i=0;i<nv;i++) { 1141 PetscInt emax = 0,eemax = 0; 1142 1143 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1144 ierr = PetscArrayzero(emarks,nee+1);CHKERRQ(ierr); 1145 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1146 for (j=1;j<nee+1;j++) { 1147 if (emax < emarks[j]) { 1148 emax = emarks[j]; 1149 eemax = j; 1150 } 1151 } 1152 /* not relevant for edges */ 1153 if (!eemax) continue; 1154 1155 for (j=ii[i];j<ii[i+1];j++) { 1156 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1157 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1158 } 1159 } 1160 } 1161 ierr = PetscFree(emarks);CHKERRQ(ierr); 1162 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1163 } 1164 1165 /* Compute extended rows indices for edge blocks of the change of basis */ 1166 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1167 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1168 extmem *= maxsize; 1169 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1170 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1171 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1172 for (i=0;i<nv;i++) { 1173 PetscInt mark = 0,size,start; 1174 1175 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1176 for (j=ii[i];j<ii[i+1];j++) 1177 if (marks[jj[j]] && !mark) 1178 mark = marks[jj[j]]; 1179 1180 /* not relevant */ 1181 if (!mark) continue; 1182 1183 /* import extended row */ 1184 mark--; 1185 start = mark*extmem+extrowcum[mark]; 1186 size = ii[i+1]-ii[i]; 1187 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1188 ierr = PetscArraycpy(extrow+start,jj+ii[i],size);CHKERRQ(ierr); 1189 extrowcum[mark] += size; 1190 } 1191 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1192 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1193 ierr = PetscFree(marks);CHKERRQ(ierr); 1194 1195 /* Compress extrows */ 1196 cum = 0; 1197 for (i=0;i<nee;i++) { 1198 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1199 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1200 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1201 cum = PetscMax(cum,size); 1202 } 1203 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1204 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1205 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1206 1207 /* Workspace for lapack inner calls and VecSetValues */ 1208 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1209 1210 /* Create change of basis matrix (preallocation can be improved) */ 1211 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1212 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1213 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1214 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1215 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1216 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1217 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1218 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1219 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1220 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1221 1222 /* Defaults to identity */ 1223 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1224 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1225 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1226 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1227 1228 /* Create discrete gradient for the coarser level if needed */ 1229 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1230 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1231 if (pcbddc->current_level < pcbddc->max_levels) { 1232 ISLocalToGlobalMapping cel2g,cvl2g; 1233 IS wis,gwis; 1234 PetscInt cnv,cne; 1235 1236 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1237 if (fl2g) { 1238 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1239 } else { 1240 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1241 pcbddc->nedclocal = wis; 1242 } 1243 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1244 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1245 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1246 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1247 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1248 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1249 1250 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1251 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1252 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1253 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1254 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1255 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1256 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1257 1258 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1259 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1260 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1261 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1262 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1263 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1264 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1265 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1266 } 1267 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1268 1269 #if defined(PRINT_GDET) 1270 inc = 0; 1271 lev = pcbddc->current_level; 1272 #endif 1273 1274 /* Insert values in the change of basis matrix */ 1275 for (i=0;i<nee;i++) { 1276 Mat Gins = NULL, GKins = NULL; 1277 IS cornersis = NULL; 1278 PetscScalar cvals[2]; 1279 1280 if (pcbddc->nedcG) { 1281 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1282 } 1283 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1284 if (Gins && GKins) { 1285 const PetscScalar *data; 1286 const PetscInt *rows,*cols; 1287 PetscInt nrh,nch,nrc,ncc; 1288 1289 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1290 /* H1 */ 1291 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1292 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1293 ierr = MatDenseGetArrayRead(Gins,&data);CHKERRQ(ierr); 1294 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1295 ierr = MatDenseRestoreArrayRead(Gins,&data);CHKERRQ(ierr); 1296 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1297 /* complement */ 1298 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1299 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1300 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1301 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1302 ierr = MatDenseGetArrayRead(GKins,&data);CHKERRQ(ierr); 1303 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1304 ierr = MatDenseRestoreArrayRead(GKins,&data);CHKERRQ(ierr); 1305 1306 /* coarse discrete gradient */ 1307 if (pcbddc->nedcG) { 1308 PetscInt cols[2]; 1309 1310 cols[0] = 2*i; 1311 cols[1] = 2*i+1; 1312 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1313 } 1314 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1315 } 1316 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1317 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1318 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1319 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1320 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1321 } 1322 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1323 1324 /* Start assembling */ 1325 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1326 if (pcbddc->nedcG) { 1327 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1328 } 1329 1330 /* Free */ 1331 if (fl2g) { 1332 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1333 for (i=0;i<nee;i++) { 1334 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1335 } 1336 ierr = PetscFree(eedges);CHKERRQ(ierr); 1337 } 1338 1339 /* hack mat_graph with primal dofs on the coarse edges */ 1340 { 1341 PCBDDCGraph graph = pcbddc->mat_graph; 1342 PetscInt *oqueue = graph->queue; 1343 PetscInt *ocptr = graph->cptr; 1344 PetscInt ncc,*idxs; 1345 1346 /* find first primal edge */ 1347 if (pcbddc->nedclocal) { 1348 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1349 } else { 1350 if (fl2g) { 1351 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1352 } 1353 idxs = cedges; 1354 } 1355 cum = 0; 1356 while (cum < nee && cedges[cum] < 0) cum++; 1357 1358 /* adapt connected components */ 1359 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1360 graph->cptr[0] = 0; 1361 for (i=0,ncc=0;i<graph->ncc;i++) { 1362 PetscInt lc = ocptr[i+1]-ocptr[i]; 1363 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1364 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1365 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1366 ncc++; 1367 lc--; 1368 cum++; 1369 while (cum < nee && cedges[cum] < 0) cum++; 1370 } 1371 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1372 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1373 ncc++; 1374 } 1375 graph->ncc = ncc; 1376 if (pcbddc->nedclocal) { 1377 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1380 } 1381 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1382 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1384 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1385 1386 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1387 ierr = PetscFree(extrow);CHKERRQ(ierr); 1388 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1389 ierr = PetscFree(corners);CHKERRQ(ierr); 1390 ierr = PetscFree(cedges);CHKERRQ(ierr); 1391 ierr = PetscFree(extrows);CHKERRQ(ierr); 1392 ierr = PetscFree(extcols);CHKERRQ(ierr); 1393 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1394 1395 /* Complete assembling */ 1396 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 if (pcbddc->nedcG) { 1398 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1399 #if 0 1400 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1401 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1402 #endif 1403 } 1404 1405 /* set change of basis */ 1406 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1407 ierr = MatDestroy(&T);CHKERRQ(ierr); 1408 1409 PetscFunctionReturn(0); 1410 } 1411 1412 /* the near-null space of BDDC carries information on quadrature weights, 1413 and these can be collinear -> so cheat with MatNullSpaceCreate 1414 and create a suitable set of basis vectors first */ 1415 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1416 { 1417 PetscErrorCode ierr; 1418 PetscInt i; 1419 1420 PetscFunctionBegin; 1421 for (i=0;i<nvecs;i++) { 1422 PetscInt first,last; 1423 1424 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1425 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1426 if (i>=first && i < last) { 1427 PetscScalar *data; 1428 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1429 if (!has_const) { 1430 data[i-first] = 1.; 1431 } else { 1432 data[2*i-first] = 1./PetscSqrtReal(2.); 1433 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1434 } 1435 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1436 } 1437 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1438 } 1439 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1440 for (i=0;i<nvecs;i++) { /* reset vectors */ 1441 PetscInt first,last; 1442 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1443 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1444 if (i>=first && i < last) { 1445 PetscScalar *data; 1446 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1447 if (!has_const) { 1448 data[i-first] = 0.; 1449 } else { 1450 data[2*i-first] = 0.; 1451 data[2*i-first+1] = 0.; 1452 } 1453 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1454 } 1455 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1456 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1462 { 1463 Mat loc_divudotp; 1464 Vec p,v,vins,quad_vec,*quad_vecs; 1465 ISLocalToGlobalMapping map; 1466 PetscScalar *vals; 1467 const PetscScalar *array; 1468 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1469 PetscInt n_neigh,*neigh,*n_shared,**shared; 1470 PetscMPIInt rank; 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1475 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1476 ierr = MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1477 if (!maxneighs) { 1478 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1479 *nnsp = NULL; 1480 PetscFunctionReturn(0); 1481 } 1482 maxsize = 0; 1483 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1484 ierr = PetscMalloc2(maxsize,&gidxs,maxsize,&vals);CHKERRQ(ierr); 1485 /* create vectors to hold quadrature weights */ 1486 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1487 if (!transpose) { 1488 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1489 } else { 1490 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1491 } 1492 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1493 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1494 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1495 for (i=0;i<maxneighs;i++) { 1496 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1497 } 1498 1499 /* compute local quad vec */ 1500 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1501 if (!transpose) { 1502 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1503 } else { 1504 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1505 } 1506 ierr = VecSet(p,1.);CHKERRQ(ierr); 1507 if (!transpose) { 1508 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1509 } else { 1510 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1511 } 1512 if (vl2l) { 1513 Mat lA; 1514 VecScatter sc; 1515 1516 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1517 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1518 ierr = VecScatterCreate(v,NULL,vins,vl2l,&sc);CHKERRQ(ierr); 1519 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1522 } else { 1523 vins = v; 1524 } 1525 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1526 ierr = VecDestroy(&p);CHKERRQ(ierr); 1527 1528 /* insert in global quadrature vecs */ 1529 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 1530 for (i=1;i<n_neigh;i++) { 1531 const PetscInt *idxs; 1532 PetscInt idx,nn,j; 1533 1534 idxs = shared[i]; 1535 nn = n_shared[i]; 1536 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1537 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1538 idx = -(idx+1); 1539 if (idx < 0 || idx >= maxneighs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs); 1540 ierr = ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);CHKERRQ(ierr); 1541 ierr = VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1542 } 1543 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1544 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1545 if (vl2l) { 1546 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1547 } 1548 ierr = VecDestroy(&v);CHKERRQ(ierr); 1549 ierr = PetscFree2(gidxs,vals);CHKERRQ(ierr); 1550 1551 /* assemble near null space */ 1552 for (i=0;i<maxneighs;i++) { 1553 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1554 } 1555 for (i=0;i<maxneighs;i++) { 1556 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1557 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1558 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1559 } 1560 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1565 { 1566 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1567 PetscErrorCode ierr; 1568 1569 PetscFunctionBegin; 1570 if (primalv) { 1571 if (pcbddc->user_primal_vertices_local) { 1572 IS list[2], newp; 1573 1574 list[0] = primalv; 1575 list[1] = pcbddc->user_primal_vertices_local; 1576 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1577 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1578 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1579 pcbddc->user_primal_vertices_local = newp; 1580 } else { 1581 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1582 } 1583 } 1584 PetscFunctionReturn(0); 1585 } 1586 1587 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1588 { 1589 PetscInt f, *comp = (PetscInt *)ctx; 1590 1591 PetscFunctionBegin; 1592 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1593 PetscFunctionReturn(0); 1594 } 1595 1596 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1597 { 1598 PetscErrorCode ierr; 1599 Vec local,global; 1600 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1601 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1602 PetscBool monolithic = PETSC_FALSE; 1603 1604 PetscFunctionBegin; 1605 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1606 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1607 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1608 /* need to convert from global to local topology information and remove references to information in global ordering */ 1609 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1610 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1611 ierr = VecBindToCPU(global,PETSC_TRUE);CHKERRQ(ierr); 1612 ierr = VecBindToCPU(local,PETSC_TRUE);CHKERRQ(ierr); 1613 if (monolithic) { /* just get block size to properly compute vertices */ 1614 if (pcbddc->vertex_size == 1) { 1615 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1616 } 1617 goto boundary; 1618 } 1619 1620 if (pcbddc->user_provided_isfordofs) { 1621 if (pcbddc->n_ISForDofs) { 1622 PetscInt i; 1623 1624 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1625 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1626 PetscInt bs; 1627 1628 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1629 ierr = ISGetBlockSize(pcbddc->ISForDofs[i],&bs);CHKERRQ(ierr); 1630 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1631 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1632 } 1633 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1634 pcbddc->n_ISForDofs = 0; 1635 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1636 } 1637 } else { 1638 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1639 DM dm; 1640 1641 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1642 if (!dm) { 1643 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1644 } 1645 if (dm) { 1646 IS *fields; 1647 PetscInt nf,i; 1648 1649 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1650 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1651 for (i=0;i<nf;i++) { 1652 PetscInt bs; 1653 1654 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1655 ierr = ISGetBlockSize(fields[i],&bs);CHKERRQ(ierr); 1656 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1657 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1658 } 1659 ierr = PetscFree(fields);CHKERRQ(ierr); 1660 pcbddc->n_ISForDofsLocal = nf; 1661 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1662 PetscContainer c; 1663 1664 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1665 if (c) { 1666 MatISLocalFields lf; 1667 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1668 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1669 } else { /* fallback, create the default fields if bs > 1 */ 1670 PetscInt i, n = matis->A->rmap->n; 1671 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1672 if (i > 1) { 1673 pcbddc->n_ISForDofsLocal = i; 1674 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1675 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1676 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1677 } 1678 } 1679 } 1680 } 1681 } else { 1682 PetscInt i; 1683 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1684 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1685 } 1686 } 1687 } 1688 1689 boundary: 1690 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1691 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1692 } else if (pcbddc->DirichletBoundariesLocal) { 1693 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1694 } 1695 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1696 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1697 } else if (pcbddc->NeumannBoundariesLocal) { 1698 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1699 } 1700 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1701 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1702 } 1703 ierr = VecDestroy(&global);CHKERRQ(ierr); 1704 ierr = VecDestroy(&local);CHKERRQ(ierr); 1705 /* detect local disconnected subdomains if requested (use matis->A) */ 1706 if (pcbddc->detect_disconnected) { 1707 IS primalv = NULL; 1708 PetscInt i; 1709 PetscBool filter = pcbddc->detect_disconnected_filter; 1710 1711 for (i=0;i<pcbddc->n_local_subs;i++) { 1712 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1713 } 1714 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1715 ierr = PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1716 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1717 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1718 } 1719 /* early stage corner detection */ 1720 { 1721 DM dm; 1722 1723 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1724 if (!dm) { 1725 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1726 } 1727 if (dm) { 1728 PetscBool isda; 1729 1730 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1731 if (isda) { 1732 ISLocalToGlobalMapping l2l; 1733 IS corners; 1734 Mat lA; 1735 PetscBool gl,lo; 1736 1737 { 1738 Vec cvec; 1739 const PetscScalar *coords; 1740 PetscInt dof,n,cdim; 1741 PetscBool memc = PETSC_TRUE; 1742 1743 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1744 ierr = DMGetCoordinates(dm,&cvec);CHKERRQ(ierr); 1745 ierr = VecGetLocalSize(cvec,&n);CHKERRQ(ierr); 1746 ierr = VecGetBlockSize(cvec,&cdim);CHKERRQ(ierr); 1747 n /= cdim; 1748 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 1749 ierr = PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);CHKERRQ(ierr); 1750 ierr = VecGetArrayRead(cvec,&coords);CHKERRQ(ierr); 1751 #if defined(PETSC_USE_COMPLEX) 1752 memc = PETSC_FALSE; 1753 #endif 1754 if (dof != 1) memc = PETSC_FALSE; 1755 if (memc) { 1756 ierr = PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);CHKERRQ(ierr); 1757 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1758 PetscReal *bcoords = pcbddc->mat_graph->coords; 1759 PetscInt i, b, d; 1760 1761 for (i=0;i<n;i++) { 1762 for (b=0;b<dof;b++) { 1763 for (d=0;d<cdim;d++) { 1764 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1765 } 1766 } 1767 } 1768 } 1769 ierr = VecRestoreArrayRead(cvec,&coords);CHKERRQ(ierr); 1770 pcbddc->mat_graph->cdim = cdim; 1771 pcbddc->mat_graph->cnloc = dof*n; 1772 pcbddc->mat_graph->cloc = PETSC_FALSE; 1773 } 1774 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1775 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1776 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1777 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1778 lo = (PetscBool)(l2l && corners); 1779 ierr = MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1780 if (gl) { /* From PETSc's DMDA */ 1781 const PetscInt *idx; 1782 PetscInt dof,bs,*idxout,n; 1783 1784 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1785 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1786 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1787 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1788 if (bs == dof) { 1789 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1790 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1791 } else { /* the original DMDA local-to-local map have been modified */ 1792 PetscInt i,d; 1793 1794 ierr = PetscMalloc1(dof*n,&idxout);CHKERRQ(ierr); 1795 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1796 ierr = ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);CHKERRQ(ierr); 1797 1798 bs = 1; 1799 n *= dof; 1800 } 1801 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1802 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1803 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1804 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1805 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1806 pcbddc->corner_selected = PETSC_TRUE; 1807 pcbddc->corner_selection = PETSC_TRUE; 1808 } 1809 if (corners) { 1810 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1811 } 1812 } 1813 } 1814 } 1815 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1816 DM dm; 1817 1818 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1819 if (!dm) { 1820 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1821 } 1822 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1823 Vec vcoords; 1824 PetscSection section; 1825 PetscReal *coords; 1826 PetscInt d,cdim,nl,nf,**ctxs; 1827 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1828 1829 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1830 ierr = DMGetLocalSection(dm,§ion);CHKERRQ(ierr); 1831 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1832 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1833 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1834 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1835 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1836 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1837 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1838 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1839 for (d=0;d<cdim;d++) { 1840 PetscInt i; 1841 const PetscScalar *v; 1842 1843 for (i=0;i<nf;i++) ctxs[i][0] = d; 1844 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1845 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1846 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1847 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1848 } 1849 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1850 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1851 ierr = PetscFree(coords);CHKERRQ(ierr); 1852 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1853 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1854 } 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1860 { 1861 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1862 PetscErrorCode ierr; 1863 IS nis; 1864 const PetscInt *idxs; 1865 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1866 PetscBool *ld; 1867 1868 PetscFunctionBegin; 1869 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1870 if (mop == MPI_LAND) { 1871 /* init rootdata with true */ 1872 ld = (PetscBool*) matis->sf_rootdata; 1873 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1874 } else { 1875 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 1876 } 1877 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 1878 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1879 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1880 ld = (PetscBool*) matis->sf_leafdata; 1881 for (i=0;i<nd;i++) 1882 if (-1 < idxs[i] && idxs[i] < n) 1883 ld[idxs[i]] = PETSC_TRUE; 1884 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1885 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1886 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1887 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1888 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1889 if (mop == MPI_LAND) { 1890 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1891 } else { 1892 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1893 } 1894 for (i=0,nnd=0;i<n;i++) 1895 if (ld[i]) 1896 nidxs[nnd++] = i; 1897 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1898 ierr = ISDestroy(is);CHKERRQ(ierr); 1899 *is = nis; 1900 PetscFunctionReturn(0); 1901 } 1902 1903 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1904 { 1905 PC_IS *pcis = (PC_IS*)(pc->data); 1906 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1907 PetscErrorCode ierr; 1908 1909 PetscFunctionBegin; 1910 if (!pcbddc->benign_have_null) { 1911 PetscFunctionReturn(0); 1912 } 1913 if (pcbddc->ChangeOfBasisMatrix) { 1914 Vec swap; 1915 1916 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1917 swap = pcbddc->work_change; 1918 pcbddc->work_change = r; 1919 r = swap; 1920 } 1921 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1922 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1923 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);CHKERRQ(ierr); 1924 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1925 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);CHKERRQ(ierr); 1926 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 1927 ierr = VecSet(z,0.);CHKERRQ(ierr); 1928 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1929 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1930 if (pcbddc->ChangeOfBasisMatrix) { 1931 pcbddc->work_change = r; 1932 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1933 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1934 } 1935 PetscFunctionReturn(0); 1936 } 1937 1938 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1939 { 1940 PCBDDCBenignMatMult_ctx ctx; 1941 PetscErrorCode ierr; 1942 PetscBool apply_right,apply_left,reset_x; 1943 1944 PetscFunctionBegin; 1945 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1946 if (transpose) { 1947 apply_right = ctx->apply_left; 1948 apply_left = ctx->apply_right; 1949 } else { 1950 apply_right = ctx->apply_right; 1951 apply_left = ctx->apply_left; 1952 } 1953 reset_x = PETSC_FALSE; 1954 if (apply_right) { 1955 const PetscScalar *ax; 1956 PetscInt nl,i; 1957 1958 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1959 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1960 ierr = PetscArraycpy(ctx->work,ax,nl);CHKERRQ(ierr); 1961 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1962 for (i=0;i<ctx->benign_n;i++) { 1963 PetscScalar sum,val; 1964 const PetscInt *idxs; 1965 PetscInt nz,j; 1966 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1967 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1968 sum = 0.; 1969 if (ctx->apply_p0) { 1970 val = ctx->work[idxs[nz-1]]; 1971 for (j=0;j<nz-1;j++) { 1972 sum += ctx->work[idxs[j]]; 1973 ctx->work[idxs[j]] += val; 1974 } 1975 } else { 1976 for (j=0;j<nz-1;j++) { 1977 sum += ctx->work[idxs[j]]; 1978 } 1979 } 1980 ctx->work[idxs[nz-1]] -= sum; 1981 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1982 } 1983 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1984 reset_x = PETSC_TRUE; 1985 } 1986 if (transpose) { 1987 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1988 } else { 1989 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1990 } 1991 if (reset_x) { 1992 ierr = VecResetArray(x);CHKERRQ(ierr); 1993 } 1994 if (apply_left) { 1995 PetscScalar *ay; 1996 PetscInt i; 1997 1998 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1999 for (i=0;i<ctx->benign_n;i++) { 2000 PetscScalar sum,val; 2001 const PetscInt *idxs; 2002 PetscInt nz,j; 2003 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2004 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2005 val = -ay[idxs[nz-1]]; 2006 if (ctx->apply_p0) { 2007 sum = 0.; 2008 for (j=0;j<nz-1;j++) { 2009 sum += ay[idxs[j]]; 2010 ay[idxs[j]] += val; 2011 } 2012 ay[idxs[nz-1]] += sum; 2013 } else { 2014 for (j=0;j<nz-1;j++) { 2015 ay[idxs[j]] += val; 2016 } 2017 ay[idxs[nz-1]] = 0.; 2018 } 2019 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2020 } 2021 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 2022 } 2023 PetscFunctionReturn(0); 2024 } 2025 2026 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2027 { 2028 PetscErrorCode ierr; 2029 2030 PetscFunctionBegin; 2031 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2036 { 2037 PetscErrorCode ierr; 2038 2039 PetscFunctionBegin; 2040 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2045 { 2046 PC_IS *pcis = (PC_IS*)pc->data; 2047 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2048 PCBDDCBenignMatMult_ctx ctx; 2049 PetscErrorCode ierr; 2050 2051 PetscFunctionBegin; 2052 if (!restore) { 2053 Mat A_IB,A_BI; 2054 PetscScalar *work; 2055 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2056 2057 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2058 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2059 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2060 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2061 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2062 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2063 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2064 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2065 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2066 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2067 ctx->apply_left = PETSC_TRUE; 2068 ctx->apply_right = PETSC_FALSE; 2069 ctx->apply_p0 = PETSC_FALSE; 2070 ctx->benign_n = pcbddc->benign_n; 2071 if (reuse) { 2072 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2073 ctx->free = PETSC_FALSE; 2074 } else { /* TODO: could be optimized for successive solves */ 2075 ISLocalToGlobalMapping N_to_D; 2076 PetscInt i; 2077 2078 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2079 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2080 for (i=0;i<pcbddc->benign_n;i++) { 2081 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2082 } 2083 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2084 ctx->free = PETSC_TRUE; 2085 } 2086 ctx->A = pcis->A_IB; 2087 ctx->work = work; 2088 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2089 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2090 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2091 pcis->A_IB = A_IB; 2092 2093 /* A_BI as A_IB^T */ 2094 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2095 pcbddc->benign_original_mat = pcis->A_BI; 2096 pcis->A_BI = A_BI; 2097 } else { 2098 if (!pcbddc->benign_original_mat) { 2099 PetscFunctionReturn(0); 2100 } 2101 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2102 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2103 pcis->A_IB = ctx->A; 2104 ctx->A = NULL; 2105 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2106 pcis->A_BI = pcbddc->benign_original_mat; 2107 pcbddc->benign_original_mat = NULL; 2108 if (ctx->free) { 2109 PetscInt i; 2110 for (i=0;i<ctx->benign_n;i++) { 2111 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2112 } 2113 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2114 } 2115 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2116 ierr = PetscFree(ctx);CHKERRQ(ierr); 2117 } 2118 PetscFunctionReturn(0); 2119 } 2120 2121 /* used just in bddc debug mode */ 2122 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2123 { 2124 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2125 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2126 Mat An; 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2131 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2132 if (is1) { 2133 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2134 ierr = MatDestroy(&An);CHKERRQ(ierr); 2135 } else { 2136 *B = An; 2137 } 2138 PetscFunctionReturn(0); 2139 } 2140 2141 /* TODO: add reuse flag */ 2142 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2143 { 2144 Mat Bt; 2145 PetscScalar *a,*bdata; 2146 const PetscInt *ii,*ij; 2147 PetscInt m,n,i,nnz,*bii,*bij; 2148 PetscBool flg_row; 2149 PetscErrorCode ierr; 2150 2151 PetscFunctionBegin; 2152 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2153 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2154 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2155 nnz = n; 2156 for (i=0;i<ii[n];i++) { 2157 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2158 } 2159 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2160 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2161 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2162 nnz = 0; 2163 bii[0] = 0; 2164 for (i=0;i<n;i++) { 2165 PetscInt j; 2166 for (j=ii[i];j<ii[i+1];j++) { 2167 PetscScalar entry = a[j]; 2168 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2169 bij[nnz] = ij[j]; 2170 bdata[nnz] = entry; 2171 nnz++; 2172 } 2173 } 2174 bii[i+1] = nnz; 2175 } 2176 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2177 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2178 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2179 { 2180 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2181 b->free_a = PETSC_TRUE; 2182 b->free_ij = PETSC_TRUE; 2183 } 2184 if (*B == A) { 2185 ierr = MatDestroy(&A);CHKERRQ(ierr); 2186 } 2187 *B = Bt; 2188 PetscFunctionReturn(0); 2189 } 2190 2191 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2192 { 2193 Mat B = NULL; 2194 DM dm; 2195 IS is_dummy,*cc_n; 2196 ISLocalToGlobalMapping l2gmap_dummy; 2197 PCBDDCGraph graph; 2198 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2199 PetscInt i,n; 2200 PetscInt *xadj,*adjncy; 2201 PetscBool isplex = PETSC_FALSE; 2202 PetscErrorCode ierr; 2203 2204 PetscFunctionBegin; 2205 if (ncc) *ncc = 0; 2206 if (cc) *cc = NULL; 2207 if (primalv) *primalv = NULL; 2208 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2209 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2210 if (!dm) { 2211 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2212 } 2213 if (dm) { 2214 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2215 } 2216 if (filter) isplex = PETSC_FALSE; 2217 2218 if (isplex) { /* this code has been modified from plexpartition.c */ 2219 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2220 PetscInt *adj = NULL; 2221 IS cellNumbering; 2222 const PetscInt *cellNum; 2223 PetscBool useCone, useClosure; 2224 PetscSection section; 2225 PetscSegBuffer adjBuffer; 2226 PetscSF sfPoint; 2227 PetscErrorCode ierr; 2228 2229 PetscFunctionBegin; 2230 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2231 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2232 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2233 /* Build adjacency graph via a section/segbuffer */ 2234 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2235 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2236 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2237 /* Always use FVM adjacency to create partitioner graph */ 2238 ierr = DMGetBasicAdjacency(dm, &useCone, &useClosure);CHKERRQ(ierr); 2239 ierr = DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);CHKERRQ(ierr); 2240 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2241 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2242 for (n = 0, p = pStart; p < pEnd; p++) { 2243 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2244 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2245 adjSize = PETSC_DETERMINE; 2246 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2247 for (a = 0; a < adjSize; ++a) { 2248 const PetscInt point = adj[a]; 2249 if (pStart <= point && point < pEnd) { 2250 PetscInt *PETSC_RESTRICT pBuf; 2251 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2252 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2253 *pBuf = point; 2254 } 2255 } 2256 n++; 2257 } 2258 ierr = DMSetBasicAdjacency(dm, useCone, useClosure);CHKERRQ(ierr); 2259 /* Derive CSR graph from section/segbuffer */ 2260 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2261 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2262 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2263 for (idx = 0, p = pStart; p < pEnd; p++) { 2264 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2265 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2266 } 2267 xadj[n] = size; 2268 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2269 /* Clean up */ 2270 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2271 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2272 ierr = PetscFree(adj);CHKERRQ(ierr); 2273 graph->xadj = xadj; 2274 graph->adjncy = adjncy; 2275 } else { 2276 Mat A; 2277 PetscBool isseqaij, flg_row; 2278 2279 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2280 if (!A->rmap->N || !A->cmap->N) { 2281 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2282 PetscFunctionReturn(0); 2283 } 2284 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2285 if (!isseqaij && filter) { 2286 PetscBool isseqdense; 2287 2288 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2289 if (!isseqdense) { 2290 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2291 } else { /* TODO: rectangular case and LDA */ 2292 PetscScalar *array; 2293 PetscReal chop=1.e-6; 2294 2295 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2296 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2297 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2298 for (i=0;i<n;i++) { 2299 PetscInt j; 2300 for (j=i+1;j<n;j++) { 2301 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2302 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2303 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2304 } 2305 } 2306 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2307 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2308 } 2309 } else { 2310 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2311 B = A; 2312 } 2313 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2314 2315 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2316 if (filter) { 2317 PetscScalar *data; 2318 PetscInt j,cum; 2319 2320 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2321 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2322 cum = 0; 2323 for (i=0;i<n;i++) { 2324 PetscInt t; 2325 2326 for (j=xadj[i];j<xadj[i+1];j++) { 2327 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2328 continue; 2329 } 2330 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2331 } 2332 t = xadj_filtered[i]; 2333 xadj_filtered[i] = cum; 2334 cum += t; 2335 } 2336 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2337 graph->xadj = xadj_filtered; 2338 graph->adjncy = adjncy_filtered; 2339 } else { 2340 graph->xadj = xadj; 2341 graph->adjncy = adjncy; 2342 } 2343 } 2344 /* compute local connected components using PCBDDCGraph */ 2345 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2346 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2347 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2348 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2349 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2350 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2351 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2352 2353 /* partial clean up */ 2354 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2355 if (B) { 2356 PetscBool flg_row; 2357 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2358 ierr = MatDestroy(&B);CHKERRQ(ierr); 2359 } 2360 if (isplex) { 2361 ierr = PetscFree(xadj);CHKERRQ(ierr); 2362 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2363 } 2364 2365 /* get back data */ 2366 if (isplex) { 2367 if (ncc) *ncc = graph->ncc; 2368 if (cc || primalv) { 2369 Mat A; 2370 PetscBT btv,btvt; 2371 PetscSection subSection; 2372 PetscInt *ids,cum,cump,*cids,*pids; 2373 2374 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2375 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2376 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2377 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2378 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2379 2380 cids[0] = 0; 2381 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2382 PetscInt j; 2383 2384 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2385 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2386 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2387 2388 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2389 for (k = 0; k < 2*size; k += 2) { 2390 PetscInt s, pp, p = closure[k], off, dof, cdof; 2391 2392 ierr = PetscSectionGetConstraintDof(subSection,p,&cdof);CHKERRQ(ierr); 2393 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2394 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2395 for (s = 0; s < dof-cdof; s++) { 2396 if (PetscBTLookupSet(btvt,off+s)) continue; 2397 if (!PetscBTLookup(btv,off+s)) { 2398 ids[cum++] = off+s; 2399 } else { /* cross-vertex */ 2400 pids[cump++] = off+s; 2401 } 2402 } 2403 ierr = DMPlexGetTreeParent(dm,p,&pp,NULL);CHKERRQ(ierr); 2404 if (pp != p) { 2405 ierr = PetscSectionGetConstraintDof(subSection,pp,&cdof);CHKERRQ(ierr); 2406 ierr = PetscSectionGetOffset(subSection,pp,&off);CHKERRQ(ierr); 2407 ierr = PetscSectionGetDof(subSection,pp,&dof);CHKERRQ(ierr); 2408 for (s = 0; s < dof-cdof; s++) { 2409 if (PetscBTLookupSet(btvt,off+s)) continue; 2410 if (!PetscBTLookup(btv,off+s)) { 2411 ids[cum++] = off+s; 2412 } else { /* cross-vertex */ 2413 pids[cump++] = off+s; 2414 } 2415 } 2416 } 2417 } 2418 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2419 } 2420 cids[i+1] = cum; 2421 /* mark dofs as already assigned */ 2422 for (j = cids[i]; j < cids[i+1]; j++) { 2423 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2424 } 2425 } 2426 if (cc) { 2427 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2428 for (i = 0; i < graph->ncc; i++) { 2429 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2430 } 2431 *cc = cc_n; 2432 } 2433 if (primalv) { 2434 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2435 } 2436 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2437 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2438 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2439 } 2440 } else { 2441 if (ncc) *ncc = graph->ncc; 2442 if (cc) { 2443 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2444 for (i=0;i<graph->ncc;i++) { 2445 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2446 } 2447 *cc = cc_n; 2448 } 2449 } 2450 /* clean up graph */ 2451 graph->xadj = NULL; 2452 graph->adjncy = NULL; 2453 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2458 { 2459 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2460 PC_IS* pcis = (PC_IS*)(pc->data); 2461 IS dirIS = NULL; 2462 PetscInt i; 2463 PetscErrorCode ierr; 2464 2465 PetscFunctionBegin; 2466 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2467 if (zerodiag) { 2468 Mat A; 2469 Vec vec3_N; 2470 PetscScalar *vals; 2471 const PetscInt *idxs; 2472 PetscInt nz,*count; 2473 2474 /* p0 */ 2475 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2476 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2477 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2478 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2479 for (i=0;i<nz;i++) vals[i] = 1.; 2480 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2481 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2482 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2483 /* v_I */ 2484 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2485 for (i=0;i<nz;i++) vals[i] = 0.; 2486 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2487 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2488 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2489 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2490 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2491 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2492 if (dirIS) { 2493 PetscInt n; 2494 2495 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2496 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2497 for (i=0;i<n;i++) vals[i] = 0.; 2498 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2499 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2500 } 2501 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2502 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2503 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2504 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2505 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2506 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2507 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2508 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2509 ierr = PetscFree(vals);CHKERRQ(ierr); 2510 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2511 2512 /* there should not be any pressure dofs lying on the interface */ 2513 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2514 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2515 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2516 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2517 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2518 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2519 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2520 ierr = PetscFree(count);CHKERRQ(ierr); 2521 } 2522 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2523 2524 /* check PCBDDCBenignGetOrSetP0 */ 2525 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2526 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2527 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2528 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2529 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2530 for (i=0;i<pcbddc->benign_n;i++) { 2531 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2532 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2533 } 2534 PetscFunctionReturn(0); 2535 } 2536 2537 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2538 { 2539 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2540 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2541 PetscInt nz,n,benign_n,bsp = 1; 2542 PetscInt *interior_dofs,n_interior_dofs,nneu; 2543 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2544 PetscErrorCode ierr; 2545 2546 PetscFunctionBegin; 2547 if (reuse) goto project_b0; 2548 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2549 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2550 for (n=0;n<pcbddc->benign_n;n++) { 2551 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2552 } 2553 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2554 has_null_pressures = PETSC_TRUE; 2555 have_null = PETSC_TRUE; 2556 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2557 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2558 Checks if all the pressure dofs in each subdomain have a zero diagonal 2559 If not, a change of basis on pressures is not needed 2560 since the local Schur complements are already SPD 2561 */ 2562 if (pcbddc->n_ISForDofsLocal) { 2563 IS iP = NULL; 2564 PetscInt p,*pp; 2565 PetscBool flg; 2566 2567 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);CHKERRQ(ierr); 2568 n = pcbddc->n_ISForDofsLocal; 2569 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2570 ierr = PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);CHKERRQ(ierr); 2571 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2572 if (!flg) { 2573 n = 1; 2574 pp[0] = pcbddc->n_ISForDofsLocal-1; 2575 } 2576 2577 bsp = 0; 2578 for (p=0;p<n;p++) { 2579 PetscInt bs; 2580 2581 if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2582 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2583 bsp += bs; 2584 } 2585 ierr = PetscMalloc1(bsp,&bzerodiag);CHKERRQ(ierr); 2586 bsp = 0; 2587 for (p=0;p<n;p++) { 2588 const PetscInt *idxs; 2589 PetscInt b,bs,npl,*bidxs; 2590 2591 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2592 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);CHKERRQ(ierr); 2593 ierr = ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2594 ierr = PetscMalloc1(npl/bs,&bidxs);CHKERRQ(ierr); 2595 for (b=0;b<bs;b++) { 2596 PetscInt i; 2597 2598 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2599 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);CHKERRQ(ierr); 2600 bsp++; 2601 } 2602 ierr = PetscFree(bidxs);CHKERRQ(ierr); 2603 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2604 } 2605 ierr = ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);CHKERRQ(ierr); 2606 2607 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2608 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2609 if (iP) { 2610 IS newpressures; 2611 2612 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2613 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2614 pressures = newpressures; 2615 } 2616 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2617 if (!sorted) { 2618 ierr = ISSort(pressures);CHKERRQ(ierr); 2619 } 2620 ierr = PetscFree(pp);CHKERRQ(ierr); 2621 } 2622 2623 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2624 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2625 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2626 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2627 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2628 if (!sorted) { 2629 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2630 } 2631 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2632 zerodiag_save = zerodiag; 2633 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2634 if (!nz) { 2635 if (n) have_null = PETSC_FALSE; 2636 has_null_pressures = PETSC_FALSE; 2637 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2638 } 2639 recompute_zerodiag = PETSC_FALSE; 2640 2641 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2642 zerodiag_subs = NULL; 2643 benign_n = 0; 2644 n_interior_dofs = 0; 2645 interior_dofs = NULL; 2646 nneu = 0; 2647 if (pcbddc->NeumannBoundariesLocal) { 2648 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2649 } 2650 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2651 if (checkb) { /* need to compute interior nodes */ 2652 PetscInt n,i,j; 2653 PetscInt n_neigh,*neigh,*n_shared,**shared; 2654 PetscInt *iwork; 2655 2656 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2657 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2658 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2659 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2660 for (i=1;i<n_neigh;i++) 2661 for (j=0;j<n_shared[i];j++) 2662 iwork[shared[i][j]] += 1; 2663 for (i=0;i<n;i++) 2664 if (!iwork[i]) 2665 interior_dofs[n_interior_dofs++] = i; 2666 ierr = PetscFree(iwork);CHKERRQ(ierr); 2667 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2668 } 2669 if (has_null_pressures) { 2670 IS *subs; 2671 PetscInt nsubs,i,j,nl; 2672 const PetscInt *idxs; 2673 PetscScalar *array; 2674 Vec *work; 2675 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2676 2677 subs = pcbddc->local_subs; 2678 nsubs = pcbddc->n_local_subs; 2679 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2680 if (checkb) { 2681 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2682 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2683 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2684 /* work[0] = 1_p */ 2685 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2686 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2687 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2688 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2689 /* work[0] = 1_v */ 2690 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2691 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2692 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2693 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2694 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2695 } 2696 2697 if (nsubs > 1 || bsp > 1) { 2698 IS *is; 2699 PetscInt b,totb; 2700 2701 totb = bsp; 2702 is = bsp > 1 ? bzerodiag : &zerodiag; 2703 nsubs = PetscMax(nsubs,1); 2704 ierr = PetscCalloc1(nsubs*totb,&zerodiag_subs);CHKERRQ(ierr); 2705 for (b=0;b<totb;b++) { 2706 for (i=0;i<nsubs;i++) { 2707 ISLocalToGlobalMapping l2g; 2708 IS t_zerodiag_subs; 2709 PetscInt nl; 2710 2711 if (subs) { 2712 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2713 } else { 2714 IS tis; 2715 2716 ierr = MatGetLocalSize(pcbddc->local_mat,&nl,NULL);CHKERRQ(ierr); 2717 ierr = ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);CHKERRQ(ierr); 2718 ierr = ISLocalToGlobalMappingCreateIS(tis,&l2g);CHKERRQ(ierr); 2719 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2720 } 2721 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);CHKERRQ(ierr); 2722 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2723 if (nl) { 2724 PetscBool valid = PETSC_TRUE; 2725 2726 if (checkb) { 2727 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2728 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2729 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2730 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2731 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2732 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2733 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2734 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2735 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2736 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2737 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2738 for (j=0;j<n_interior_dofs;j++) { 2739 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2740 valid = PETSC_FALSE; 2741 break; 2742 } 2743 } 2744 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2745 } 2746 if (valid && nneu) { 2747 const PetscInt *idxs; 2748 PetscInt nzb; 2749 2750 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2751 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2752 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2753 if (nzb) valid = PETSC_FALSE; 2754 } 2755 if (valid && pressures) { 2756 IS t_pressure_subs,tmp; 2757 PetscInt i1,i2; 2758 2759 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2760 ierr = ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);CHKERRQ(ierr); 2761 ierr = ISGetLocalSize(tmp,&i1);CHKERRQ(ierr); 2762 ierr = ISGetLocalSize(t_zerodiag_subs,&i2);CHKERRQ(ierr); 2763 if (i2 != i1) valid = PETSC_FALSE; 2764 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2765 ierr = ISDestroy(&tmp);CHKERRQ(ierr); 2766 } 2767 if (valid) { 2768 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);CHKERRQ(ierr); 2769 benign_n++; 2770 } else recompute_zerodiag = PETSC_TRUE; 2771 } 2772 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2773 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2774 } 2775 } 2776 } else { /* there's just one subdomain (or zero if they have not been detected */ 2777 PetscBool valid = PETSC_TRUE; 2778 2779 if (nneu) valid = PETSC_FALSE; 2780 if (valid && pressures) { 2781 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2782 } 2783 if (valid && checkb) { 2784 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2785 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2786 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2787 for (j=0;j<n_interior_dofs;j++) { 2788 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2789 valid = PETSC_FALSE; 2790 break; 2791 } 2792 } 2793 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2794 } 2795 if (valid) { 2796 benign_n = 1; 2797 ierr = PetscMalloc1(benign_n,&zerodiag_subs);CHKERRQ(ierr); 2798 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2799 zerodiag_subs[0] = zerodiag; 2800 } 2801 } 2802 if (checkb) { 2803 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2804 } 2805 } 2806 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2807 2808 if (!benign_n) { 2809 PetscInt n; 2810 2811 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2812 recompute_zerodiag = PETSC_FALSE; 2813 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2814 if (n) have_null = PETSC_FALSE; 2815 } 2816 2817 /* final check for null pressures */ 2818 if (zerodiag && pressures) { 2819 ierr = ISEqual(pressures,zerodiag,&have_null);CHKERRQ(ierr); 2820 } 2821 2822 if (recompute_zerodiag) { 2823 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2824 if (benign_n == 1) { 2825 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2826 zerodiag = zerodiag_subs[0]; 2827 } else { 2828 PetscInt i,nzn,*new_idxs; 2829 2830 nzn = 0; 2831 for (i=0;i<benign_n;i++) { 2832 PetscInt ns; 2833 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2834 nzn += ns; 2835 } 2836 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2837 nzn = 0; 2838 for (i=0;i<benign_n;i++) { 2839 PetscInt ns,*idxs; 2840 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2841 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2842 ierr = PetscArraycpy(new_idxs+nzn,idxs,ns);CHKERRQ(ierr); 2843 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2844 nzn += ns; 2845 } 2846 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2847 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2848 } 2849 have_null = PETSC_FALSE; 2850 } 2851 2852 /* determines if the coarse solver will be singular or not */ 2853 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2854 2855 /* Prepare matrix to compute no-net-flux */ 2856 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2857 Mat A,loc_divudotp; 2858 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2859 IS row,col,isused = NULL; 2860 PetscInt M,N,n,st,n_isused; 2861 2862 if (pressures) { 2863 isused = pressures; 2864 } else { 2865 isused = zerodiag_save; 2866 } 2867 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2868 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2869 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2870 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2871 n_isused = 0; 2872 if (isused) { 2873 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2874 } 2875 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 2876 st = st-n_isused; 2877 if (n) { 2878 const PetscInt *gidxs; 2879 2880 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2881 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2882 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2883 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2884 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2885 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2886 } else { 2887 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2888 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2889 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2890 } 2891 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2892 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2893 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2894 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2895 ierr = ISDestroy(&row);CHKERRQ(ierr); 2896 ierr = ISDestroy(&col);CHKERRQ(ierr); 2897 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2898 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2899 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2900 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2901 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2902 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2903 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2904 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2905 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2906 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2907 } 2908 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2909 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2910 if (bzerodiag) { 2911 PetscInt i; 2912 2913 for (i=0;i<bsp;i++) { 2914 ierr = ISDestroy(&bzerodiag[i]);CHKERRQ(ierr); 2915 } 2916 ierr = PetscFree(bzerodiag);CHKERRQ(ierr); 2917 } 2918 pcbddc->benign_n = benign_n; 2919 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2920 2921 /* determines if the problem has subdomains with 0 pressure block */ 2922 have_null = (PetscBool)(!!pcbddc->benign_n); 2923 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2924 2925 project_b0: 2926 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2927 /* change of basis and p0 dofs */ 2928 if (pcbddc->benign_n) { 2929 PetscInt i,s,*nnz; 2930 2931 /* local change of basis for pressures */ 2932 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2933 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2934 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2935 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2936 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2937 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2938 for (i=0;i<pcbddc->benign_n;i++) { 2939 const PetscInt *idxs; 2940 PetscInt nzs,j; 2941 2942 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2943 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2944 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2945 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2946 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2947 } 2948 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2949 ierr = MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2950 ierr = PetscFree(nnz);CHKERRQ(ierr); 2951 /* set identity by default */ 2952 for (i=0;i<n;i++) { 2953 ierr = MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);CHKERRQ(ierr); 2954 } 2955 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2956 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2957 /* set change on pressures */ 2958 for (s=0;s<pcbddc->benign_n;s++) { 2959 PetscScalar *array; 2960 const PetscInt *idxs; 2961 PetscInt nzs; 2962 2963 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2964 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2965 for (i=0;i<nzs-1;i++) { 2966 PetscScalar vals[2]; 2967 PetscInt cols[2]; 2968 2969 cols[0] = idxs[i]; 2970 cols[1] = idxs[nzs-1]; 2971 vals[0] = 1.; 2972 vals[1] = 1.; 2973 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2974 } 2975 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2976 for (i=0;i<nzs-1;i++) array[i] = -1.; 2977 array[nzs-1] = 1.; 2978 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2979 /* store local idxs for p0 */ 2980 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2981 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2982 ierr = PetscFree(array);CHKERRQ(ierr); 2983 } 2984 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2985 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2986 2987 /* project if needed */ 2988 if (pcbddc->benign_change_explicit) { 2989 Mat M; 2990 2991 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2992 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2993 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2994 ierr = MatDestroy(&M);CHKERRQ(ierr); 2995 } 2996 /* store global idxs for p0 */ 2997 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2998 } 2999 *zerodiaglocal = zerodiag; 3000 PetscFunctionReturn(0); 3001 } 3002 3003 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3004 { 3005 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3006 PetscScalar *array; 3007 PetscErrorCode ierr; 3008 3009 PetscFunctionBegin; 3010 if (!pcbddc->benign_sf) { 3011 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 3012 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 3013 } 3014 if (get) { 3015 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3016 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 3017 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 3018 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3019 } else { 3020 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 3021 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 3022 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 3023 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 3024 } 3025 PetscFunctionReturn(0); 3026 } 3027 3028 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3029 { 3030 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3031 PetscErrorCode ierr; 3032 3033 PetscFunctionBegin; 3034 /* TODO: add error checking 3035 - avoid nested pop (or push) calls. 3036 - cannot push before pop. 3037 - cannot call this if pcbddc->local_mat is NULL 3038 */ 3039 if (!pcbddc->benign_n) { 3040 PetscFunctionReturn(0); 3041 } 3042 if (pop) { 3043 if (pcbddc->benign_change_explicit) { 3044 IS is_p0; 3045 MatReuse reuse; 3046 3047 /* extract B_0 */ 3048 reuse = MAT_INITIAL_MATRIX; 3049 if (pcbddc->benign_B0) { 3050 reuse = MAT_REUSE_MATRIX; 3051 } 3052 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 3053 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 3054 /* remove rows and cols from local problem */ 3055 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 3056 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3057 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 3058 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3059 } else { 3060 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3061 PetscScalar *vals; 3062 PetscInt i,n,*idxs_ins; 3063 3064 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 3065 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 3066 if (!pcbddc->benign_B0) { 3067 PetscInt *nnz; 3068 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 3069 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 3070 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3071 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 3072 for (i=0;i<pcbddc->benign_n;i++) { 3073 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 3074 nnz[i] = n - nnz[i]; 3075 } 3076 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 3077 ierr = MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3078 ierr = PetscFree(nnz);CHKERRQ(ierr); 3079 } 3080 3081 for (i=0;i<pcbddc->benign_n;i++) { 3082 PetscScalar *array; 3083 PetscInt *idxs,j,nz,cum; 3084 3085 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 3086 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3087 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3088 for (j=0;j<nz;j++) vals[j] = 1.; 3089 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3090 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 3091 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 3092 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 3093 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 3094 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3095 cum = 0; 3096 for (j=0;j<n;j++) { 3097 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3098 vals[cum] = array[j]; 3099 idxs_ins[cum] = j; 3100 cum++; 3101 } 3102 } 3103 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3104 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3105 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3106 } 3107 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3108 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3110 } 3111 } else { /* push */ 3112 if (pcbddc->benign_change_explicit) { 3113 PetscInt i; 3114 3115 for (i=0;i<pcbddc->benign_n;i++) { 3116 PetscScalar *B0_vals; 3117 PetscInt *B0_cols,B0_ncol; 3118 3119 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3120 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3121 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3122 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3123 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3124 } 3125 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3126 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3127 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3128 } 3129 PetscFunctionReturn(0); 3130 } 3131 3132 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3133 { 3134 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3135 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3136 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3137 PetscBLASInt *B_iwork,*B_ifail; 3138 PetscScalar *work,lwork; 3139 PetscScalar *St,*S,*eigv; 3140 PetscScalar *Sarray,*Starray; 3141 PetscReal *eigs,thresh,lthresh,uthresh; 3142 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3143 PetscBool allocated_S_St; 3144 #if defined(PETSC_USE_COMPLEX) 3145 PetscReal *rwork; 3146 #endif 3147 PetscErrorCode ierr; 3148 3149 PetscFunctionBegin; 3150 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3151 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3152 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3153 ierr = PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3154 3155 if (pcbddc->dbg_flag) { 3156 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3157 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3159 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3160 } 3161 3162 if (pcbddc->dbg_flag) { 3163 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);CHKERRQ(ierr); 3164 } 3165 3166 /* max size of subsets */ 3167 mss = 0; 3168 for (i=0;i<sub_schurs->n_subs;i++) { 3169 PetscInt subset_size; 3170 3171 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3172 mss = PetscMax(mss,subset_size); 3173 } 3174 3175 /* min/max and threshold */ 3176 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3177 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3178 nmax = PetscMax(nmin,nmax); 3179 allocated_S_St = PETSC_FALSE; 3180 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3181 allocated_S_St = PETSC_TRUE; 3182 } 3183 3184 /* allocate lapack workspace */ 3185 cum = cum2 = 0; 3186 maxneigs = 0; 3187 for (i=0;i<sub_schurs->n_subs;i++) { 3188 PetscInt n,subset_size; 3189 3190 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3191 n = PetscMin(subset_size,nmax); 3192 cum += subset_size; 3193 cum2 += subset_size*n; 3194 maxneigs = PetscMax(maxneigs,n); 3195 } 3196 lwork = 0; 3197 if (mss) { 3198 if (sub_schurs->is_symmetric) { 3199 PetscScalar sdummy = 0.; 3200 PetscBLASInt B_itype = 1; 3201 PetscBLASInt B_N = mss, idummy = 0; 3202 PetscReal rdummy = 0.,zero = 0.0; 3203 PetscReal eps = 0.0; /* dlamch? */ 3204 3205 B_lwork = -1; 3206 /* some implementations may complain about NULL pointers, even if we are querying */ 3207 S = &sdummy; 3208 St = &sdummy; 3209 eigs = &rdummy; 3210 eigv = &sdummy; 3211 B_iwork = &idummy; 3212 B_ifail = &idummy; 3213 #if defined(PETSC_USE_COMPLEX) 3214 rwork = &rdummy; 3215 #endif 3216 thresh = 1.0; 3217 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3218 #if defined(PETSC_USE_COMPLEX) 3219 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3220 #else 3221 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3222 #endif 3223 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3224 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3225 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3226 } 3227 3228 nv = 0; 3229 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3230 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3231 } 3232 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3233 if (allocated_S_St) { 3234 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3235 } 3236 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3237 #if defined(PETSC_USE_COMPLEX) 3238 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3239 #endif 3240 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3241 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3242 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3243 nv+cum,&pcbddc->adaptive_constraints_idxs, 3244 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3245 ierr = PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);CHKERRQ(ierr); 3246 3247 maxneigs = 0; 3248 cum = cumarray = 0; 3249 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3250 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3251 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3252 const PetscInt *idxs; 3253 3254 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3255 for (cum=0;cum<nv;cum++) { 3256 pcbddc->adaptive_constraints_n[cum] = 1; 3257 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3258 pcbddc->adaptive_constraints_data[cum] = 1.0; 3259 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3260 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3261 } 3262 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3263 } 3264 3265 if (mss) { /* multilevel */ 3266 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3267 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3268 } 3269 3270 lthresh = pcbddc->adaptive_threshold[0]; 3271 uthresh = pcbddc->adaptive_threshold[1]; 3272 for (i=0;i<sub_schurs->n_subs;i++) { 3273 const PetscInt *idxs; 3274 PetscReal upper,lower; 3275 PetscInt j,subset_size,eigs_start = 0; 3276 PetscBLASInt B_N; 3277 PetscBool same_data = PETSC_FALSE; 3278 PetscBool scal = PETSC_FALSE; 3279 3280 if (pcbddc->use_deluxe_scaling) { 3281 upper = PETSC_MAX_REAL; 3282 lower = uthresh; 3283 } else { 3284 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3285 upper = 1./uthresh; 3286 lower = 0.; 3287 } 3288 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3289 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3290 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3291 /* this is experimental: we assume the dofs have been properly grouped to have 3292 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3293 if (!sub_schurs->is_posdef) { 3294 Mat T; 3295 3296 for (j=0;j<subset_size;j++) { 3297 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3298 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3299 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3300 ierr = MatDestroy(&T);CHKERRQ(ierr); 3301 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3302 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3303 ierr = MatDestroy(&T);CHKERRQ(ierr); 3304 if (sub_schurs->change_primal_sub) { 3305 PetscInt nz,k; 3306 const PetscInt *idxs; 3307 3308 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3309 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3310 for (k=0;k<nz;k++) { 3311 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3312 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3313 } 3314 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3315 } 3316 scal = PETSC_TRUE; 3317 break; 3318 } 3319 } 3320 } 3321 3322 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3323 if (sub_schurs->is_symmetric) { 3324 PetscInt j,k; 3325 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3326 ierr = PetscArrayzero(S,subset_size*subset_size);CHKERRQ(ierr); 3327 ierr = PetscArrayzero(St,subset_size*subset_size);CHKERRQ(ierr); 3328 } 3329 for (j=0;j<subset_size;j++) { 3330 for (k=j;k<subset_size;k++) { 3331 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3332 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3333 } 3334 } 3335 } else { 3336 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3337 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3338 } 3339 } else { 3340 S = Sarray + cumarray; 3341 St = Starray + cumarray; 3342 } 3343 /* see if we can save some work */ 3344 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3345 ierr = PetscArraycmp(S,St,subset_size*subset_size,&same_data);CHKERRQ(ierr); 3346 } 3347 3348 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3349 B_neigs = 0; 3350 } else { 3351 if (sub_schurs->is_symmetric) { 3352 PetscBLASInt B_itype = 1; 3353 PetscBLASInt B_IL, B_IU; 3354 PetscReal eps = -1.0; /* dlamch? */ 3355 PetscInt nmin_s; 3356 PetscBool compute_range; 3357 3358 B_neigs = 0; 3359 compute_range = (PetscBool)!same_data; 3360 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3361 3362 if (pcbddc->dbg_flag) { 3363 PetscInt nc = 0; 3364 3365 if (sub_schurs->change_primal_sub) { 3366 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3367 } 3368 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3369 } 3370 3371 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3372 if (compute_range) { 3373 3374 /* ask for eigenvalues larger than thresh */ 3375 if (sub_schurs->is_posdef) { 3376 #if defined(PETSC_USE_COMPLEX) 3377 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3378 #else 3379 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3380 #endif 3381 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3382 } else { /* no theory so far, but it works nicely */ 3383 PetscInt recipe = 0,recipe_m = 1; 3384 PetscReal bb[2]; 3385 3386 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3387 switch (recipe) { 3388 case 0: 3389 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3390 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3391 #if defined(PETSC_USE_COMPLEX) 3392 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3393 #else 3394 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3395 #endif 3396 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3397 break; 3398 case 1: 3399 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3400 #if defined(PETSC_USE_COMPLEX) 3401 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3402 #else 3403 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3404 #endif 3405 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3406 if (!scal) { 3407 PetscBLASInt B_neigs2 = 0; 3408 3409 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3410 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3411 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3412 #if defined(PETSC_USE_COMPLEX) 3413 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3414 #else 3415 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3416 #endif 3417 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3418 B_neigs += B_neigs2; 3419 } 3420 break; 3421 case 2: 3422 if (scal) { 3423 bb[0] = PETSC_MIN_REAL; 3424 bb[1] = 0; 3425 #if defined(PETSC_USE_COMPLEX) 3426 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3427 #else 3428 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3429 #endif 3430 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3431 } else { 3432 PetscBLASInt B_neigs2 = 0; 3433 PetscBool import = PETSC_FALSE; 3434 3435 lthresh = PetscMax(lthresh,0.0); 3436 if (lthresh > 0.0) { 3437 bb[0] = PETSC_MIN_REAL; 3438 bb[1] = lthresh*lthresh; 3439 3440 import = PETSC_TRUE; 3441 #if defined(PETSC_USE_COMPLEX) 3442 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3443 #else 3444 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3445 #endif 3446 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3447 } 3448 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3449 bb[1] = PETSC_MAX_REAL; 3450 if (import) { 3451 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3452 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3453 } 3454 #if defined(PETSC_USE_COMPLEX) 3455 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3456 #else 3457 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3458 #endif 3459 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3460 B_neigs += B_neigs2; 3461 } 3462 break; 3463 case 3: 3464 if (scal) { 3465 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3466 } else { 3467 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3468 } 3469 if (!scal) { 3470 bb[0] = uthresh; 3471 bb[1] = PETSC_MAX_REAL; 3472 #if defined(PETSC_USE_COMPLEX) 3473 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3474 #else 3475 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3476 #endif 3477 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3478 } 3479 if (recipe_m > 0 && B_N - B_neigs > 0) { 3480 PetscBLASInt B_neigs2 = 0; 3481 3482 B_IL = 1; 3483 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3484 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3485 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3486 #if defined(PETSC_USE_COMPLEX) 3487 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3488 #else 3489 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3490 #endif 3491 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3492 B_neigs += B_neigs2; 3493 } 3494 break; 3495 case 4: 3496 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3497 #if defined(PETSC_USE_COMPLEX) 3498 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3499 #else 3500 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3501 #endif 3502 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3503 { 3504 PetscBLASInt B_neigs2 = 0; 3505 3506 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3507 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3508 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3509 #if defined(PETSC_USE_COMPLEX) 3510 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3511 #else 3512 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3513 #endif 3514 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3515 B_neigs += B_neigs2; 3516 } 3517 break; 3518 case 5: /* same as before: first compute all eigenvalues, then filter */ 3519 #if defined(PETSC_USE_COMPLEX) 3520 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3521 #else 3522 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3523 #endif 3524 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3525 { 3526 PetscInt e,k,ne; 3527 for (e=0,ne=0;e<B_neigs;e++) { 3528 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3529 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3530 eigs[ne] = eigs[e]; 3531 ne++; 3532 } 3533 } 3534 ierr = PetscArraycpy(eigv,S,B_N*ne);CHKERRQ(ierr); 3535 B_neigs = ne; 3536 } 3537 break; 3538 default: 3539 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3540 } 3541 } 3542 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3543 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3544 B_IL = 1; 3545 #if defined(PETSC_USE_COMPLEX) 3546 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3547 #else 3548 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3549 #endif 3550 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3551 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3552 PetscInt k; 3553 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3554 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3555 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3556 nmin = nmax; 3557 ierr = PetscArrayzero(eigv,subset_size*nmax);CHKERRQ(ierr); 3558 for (k=0;k<nmax;k++) { 3559 eigs[k] = 1./PETSC_SMALL; 3560 eigv[k*(subset_size+1)] = 1.0; 3561 } 3562 } 3563 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3564 if (B_ierr) { 3565 if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3566 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3567 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3568 } 3569 3570 if (B_neigs > nmax) { 3571 if (pcbddc->dbg_flag) { 3572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);CHKERRQ(ierr); 3573 } 3574 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3575 B_neigs = nmax; 3576 } 3577 3578 nmin_s = PetscMin(nmin,B_N); 3579 if (B_neigs < nmin_s) { 3580 PetscBLASInt B_neigs2 = 0; 3581 3582 if (pcbddc->use_deluxe_scaling) { 3583 if (scal) { 3584 B_IU = nmin_s; 3585 B_IL = B_neigs + 1; 3586 } else { 3587 B_IL = B_N - nmin_s + 1; 3588 B_IU = B_N - B_neigs; 3589 } 3590 } else { 3591 B_IL = B_neigs + 1; 3592 B_IU = nmin_s; 3593 } 3594 if (pcbddc->dbg_flag) { 3595 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);CHKERRQ(ierr); 3596 } 3597 if (sub_schurs->is_symmetric) { 3598 PetscInt j,k; 3599 for (j=0;j<subset_size;j++) { 3600 for (k=j;k<subset_size;k++) { 3601 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3602 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3603 } 3604 } 3605 } else { 3606 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3607 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3608 } 3609 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3610 #if defined(PETSC_USE_COMPLEX) 3611 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3612 #else 3613 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3614 #endif 3615 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3616 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3617 B_neigs += B_neigs2; 3618 } 3619 if (B_ierr) { 3620 if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3621 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3622 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3623 } 3624 if (pcbddc->dbg_flag) { 3625 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3626 for (j=0;j<B_neigs;j++) { 3627 if (eigs[j] == 0.0) { 3628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3629 } else { 3630 if (pcbddc->use_deluxe_scaling) { 3631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3632 } else { 3633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3634 } 3635 } 3636 } 3637 } 3638 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3639 } 3640 /* change the basis back to the original one */ 3641 if (sub_schurs->change) { 3642 Mat change,phi,phit; 3643 3644 if (pcbddc->dbg_flag > 2) { 3645 PetscInt ii; 3646 for (ii=0;ii<B_neigs;ii++) { 3647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3648 for (j=0;j<B_N;j++) { 3649 #if defined(PETSC_USE_COMPLEX) 3650 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3651 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3652 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3653 #else 3654 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3655 #endif 3656 } 3657 } 3658 } 3659 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3660 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3661 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3662 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3663 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3664 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3665 } 3666 maxneigs = PetscMax(B_neigs,maxneigs); 3667 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3668 if (B_neigs) { 3669 ierr = PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);CHKERRQ(ierr); 3670 3671 if (pcbddc->dbg_flag > 1) { 3672 PetscInt ii; 3673 for (ii=0;ii<B_neigs;ii++) { 3674 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3675 for (j=0;j<B_N;j++) { 3676 #if defined(PETSC_USE_COMPLEX) 3677 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3678 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3679 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3680 #else 3681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3682 #endif 3683 } 3684 } 3685 } 3686 ierr = PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);CHKERRQ(ierr); 3687 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3688 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3689 cum++; 3690 } 3691 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3692 /* shift for next computation */ 3693 cumarray += subset_size*subset_size; 3694 } 3695 if (pcbddc->dbg_flag) { 3696 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3697 } 3698 3699 if (mss) { 3700 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3701 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3702 /* destroy matrices (junk) */ 3703 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3704 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3705 } 3706 if (allocated_S_St) { 3707 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3708 } 3709 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3710 #if defined(PETSC_USE_COMPLEX) 3711 ierr = PetscFree(rwork);CHKERRQ(ierr); 3712 #endif 3713 if (pcbddc->dbg_flag) { 3714 PetscInt maxneigs_r; 3715 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3716 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);CHKERRQ(ierr); 3717 } 3718 ierr = PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3719 PetscFunctionReturn(0); 3720 } 3721 3722 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3723 { 3724 PetscScalar *coarse_submat_vals; 3725 PetscErrorCode ierr; 3726 3727 PetscFunctionBegin; 3728 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3729 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3730 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3731 3732 /* Setup local neumann solver ksp_R */ 3733 /* PCBDDCSetUpLocalScatters should be called first! */ 3734 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3735 3736 /* 3737 Setup local correction and local part of coarse basis. 3738 Gives back the dense local part of the coarse matrix in column major ordering 3739 */ 3740 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3741 3742 /* Compute total number of coarse nodes and setup coarse solver */ 3743 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3744 3745 /* free */ 3746 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3747 PetscFunctionReturn(0); 3748 } 3749 3750 PetscErrorCode PCBDDCResetCustomization(PC pc) 3751 { 3752 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3753 PetscErrorCode ierr; 3754 3755 PetscFunctionBegin; 3756 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3757 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3758 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3759 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3760 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3761 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3762 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3763 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3764 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3765 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3766 PetscFunctionReturn(0); 3767 } 3768 3769 PetscErrorCode PCBDDCResetTopography(PC pc) 3770 { 3771 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3772 PetscInt i; 3773 PetscErrorCode ierr; 3774 3775 PetscFunctionBegin; 3776 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3777 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3778 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3779 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3780 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3781 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3782 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3783 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3784 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3785 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3786 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3787 for (i=0;i<pcbddc->n_local_subs;i++) { 3788 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3789 } 3790 pcbddc->n_local_subs = 0; 3791 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3792 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3793 pcbddc->graphanalyzed = PETSC_FALSE; 3794 pcbddc->recompute_topography = PETSC_TRUE; 3795 pcbddc->corner_selected = PETSC_FALSE; 3796 PetscFunctionReturn(0); 3797 } 3798 3799 PetscErrorCode PCBDDCResetSolvers(PC pc) 3800 { 3801 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3802 PetscErrorCode ierr; 3803 3804 PetscFunctionBegin; 3805 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3806 if (pcbddc->coarse_phi_B) { 3807 PetscScalar *array; 3808 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3809 ierr = PetscFree(array);CHKERRQ(ierr); 3810 } 3811 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3812 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3813 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3814 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3815 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3816 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3817 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3818 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3819 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3820 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3821 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3822 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3823 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3824 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3825 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3826 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3827 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3828 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3829 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3830 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3831 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3832 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3833 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3834 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3835 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3836 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3837 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3838 if (pcbddc->benign_zerodiag_subs) { 3839 PetscInt i; 3840 for (i=0;i<pcbddc->benign_n;i++) { 3841 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3842 } 3843 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3844 } 3845 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3846 PetscFunctionReturn(0); 3847 } 3848 3849 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3850 { 3851 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3852 PC_IS *pcis = (PC_IS*)pc->data; 3853 VecType impVecType; 3854 PetscInt n_constraints,n_R,old_size; 3855 PetscErrorCode ierr; 3856 3857 PetscFunctionBegin; 3858 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3859 n_R = pcis->n - pcbddc->n_vertices; 3860 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3861 /* local work vectors (try to avoid unneeded work)*/ 3862 /* R nodes */ 3863 old_size = -1; 3864 if (pcbddc->vec1_R) { 3865 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3866 } 3867 if (n_R != old_size) { 3868 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3869 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3870 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3871 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3872 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3873 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3874 } 3875 /* local primal dofs */ 3876 old_size = -1; 3877 if (pcbddc->vec1_P) { 3878 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3879 } 3880 if (pcbddc->local_primal_size != old_size) { 3881 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3882 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3883 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3884 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3885 } 3886 /* local explicit constraints */ 3887 old_size = -1; 3888 if (pcbddc->vec1_C) { 3889 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3890 } 3891 if (n_constraints && n_constraints != old_size) { 3892 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3893 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3894 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3895 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3896 } 3897 PetscFunctionReturn(0); 3898 } 3899 3900 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3901 { 3902 PetscErrorCode ierr; 3903 /* pointers to pcis and pcbddc */ 3904 PC_IS* pcis = (PC_IS*)pc->data; 3905 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3906 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3907 /* submatrices of local problem */ 3908 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3909 /* submatrices of local coarse problem */ 3910 Mat S_VV,S_CV,S_VC,S_CC; 3911 /* working matrices */ 3912 Mat C_CR; 3913 /* additional working stuff */ 3914 PC pc_R; 3915 Mat F,Brhs = NULL; 3916 Vec dummy_vec; 3917 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3918 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3919 PetscScalar *work; 3920 PetscInt *idx_V_B; 3921 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3922 PetscInt i,n_R,n_D,n_B; 3923 PetscScalar one=1.0,m_one=-1.0; 3924 3925 PetscFunctionBegin; 3926 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3927 ierr = PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3928 3929 /* Set Non-overlapping dimensions */ 3930 n_vertices = pcbddc->n_vertices; 3931 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3932 n_B = pcis->n_B; 3933 n_D = pcis->n - n_B; 3934 n_R = pcis->n - n_vertices; 3935 3936 /* vertices in boundary numbering */ 3937 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3938 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3939 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3940 3941 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3942 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3943 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3944 ierr = MatDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3945 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3946 ierr = MatDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3947 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3948 ierr = MatDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3949 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3950 ierr = MatDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3951 3952 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3953 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3954 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3955 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3956 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3957 lda_rhs = n_R; 3958 need_benign_correction = PETSC_FALSE; 3959 if (isLU || isCHOL) { 3960 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3961 } else if (sub_schurs && sub_schurs->reuse_solver) { 3962 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3963 MatFactorType type; 3964 3965 F = reuse_solver->F; 3966 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3967 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3968 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3969 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3970 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3971 } else F = NULL; 3972 3973 /* determine if we can use a sparse right-hand side */ 3974 sparserhs = PETSC_FALSE; 3975 if (F) { 3976 MatSolverType solver; 3977 3978 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3979 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3980 } 3981 3982 /* allocate workspace */ 3983 n = 0; 3984 if (n_constraints) { 3985 n += lda_rhs*n_constraints; 3986 } 3987 if (n_vertices) { 3988 n = PetscMax(2*lda_rhs*n_vertices,n); 3989 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3990 } 3991 if (!pcbddc->symmetric_primal) { 3992 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3993 } 3994 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3995 3996 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3997 dummy_vec = NULL; 3998 if (need_benign_correction && lda_rhs != n_R && F) { 3999 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);CHKERRQ(ierr); 4000 ierr = VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);CHKERRQ(ierr); 4001 ierr = VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);CHKERRQ(ierr); 4002 } 4003 4004 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 4005 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 4006 4007 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4008 if (n_constraints) { 4009 Mat M3,C_B; 4010 IS is_aux; 4011 PetscScalar *array,*array2; 4012 4013 /* Extract constraints on R nodes: C_{CR} */ 4014 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 4015 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 4016 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4017 4018 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4019 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4020 if (!sparserhs) { 4021 ierr = PetscArrayzero(work,lda_rhs*n_constraints);CHKERRQ(ierr); 4022 for (i=0;i<n_constraints;i++) { 4023 const PetscScalar *row_cmat_values; 4024 const PetscInt *row_cmat_indices; 4025 PetscInt size_of_constraint,j; 4026 4027 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4028 for (j=0;j<size_of_constraint;j++) { 4029 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4030 } 4031 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4032 } 4033 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 4034 } else { 4035 Mat tC_CR; 4036 4037 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4038 if (lda_rhs != n_R) { 4039 PetscScalar *aa; 4040 PetscInt r,*ii,*jj; 4041 PetscBool done; 4042 4043 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4044 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4045 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 4046 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 4047 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4048 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4049 } else { 4050 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 4051 tC_CR = C_CR; 4052 } 4053 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 4054 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 4055 } 4056 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 4057 if (F) { 4058 if (need_benign_correction) { 4059 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4060 4061 /* rhs is already zero on interior dofs, no need to change the rhs */ 4062 ierr = PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);CHKERRQ(ierr); 4063 } 4064 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 4065 if (need_benign_correction) { 4066 PetscScalar *marr; 4067 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4068 4069 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4070 if (lda_rhs != n_R) { 4071 for (i=0;i<n_constraints;i++) { 4072 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4073 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4074 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4075 } 4076 } else { 4077 for (i=0;i<n_constraints;i++) { 4078 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4079 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4080 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4081 } 4082 } 4083 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4084 } 4085 } else { 4086 PetscScalar *marr; 4087 4088 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4089 for (i=0;i<n_constraints;i++) { 4090 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4091 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 4092 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4093 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4094 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4095 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4096 } 4097 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4098 } 4099 if (sparserhs) { 4100 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4101 } 4102 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4103 if (!pcbddc->switch_static) { 4104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4105 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4106 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4107 for (i=0;i<n_constraints;i++) { 4108 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 4109 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 4110 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4111 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4112 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4113 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4114 } 4115 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4116 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4117 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4118 } else { 4119 if (lda_rhs != n_R) { 4120 IS dummy; 4121 4122 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4123 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4124 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4125 } else { 4126 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4127 pcbddc->local_auxmat2 = local_auxmat2_R; 4128 } 4129 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4130 } 4131 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4132 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4133 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4134 if (isCHOL) { 4135 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4136 } else { 4137 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4138 } 4139 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4140 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4141 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4142 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4143 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4144 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4145 } 4146 4147 /* Get submatrices from subdomain matrix */ 4148 if (n_vertices) { 4149 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4150 PetscBool oldpin; 4151 #endif 4152 PetscBool isaij; 4153 IS is_aux; 4154 4155 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4156 IS tis; 4157 4158 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4159 ierr = ISSort(tis);CHKERRQ(ierr); 4160 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4161 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4162 } else { 4163 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4164 } 4165 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4166 oldpin = pcbddc->local_mat->boundtocpu; 4167 #endif 4168 ierr = MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);CHKERRQ(ierr); 4169 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4170 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4171 ierr = PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);CHKERRQ(ierr); 4172 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4173 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4174 } 4175 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4176 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4177 ierr = MatBindToCPU(pcbddc->local_mat,oldpin);CHKERRQ(ierr); 4178 #endif 4179 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4180 } 4181 4182 /* Matrix of coarse basis functions (local) */ 4183 if (pcbddc->coarse_phi_B) { 4184 PetscInt on_B,on_primal,on_D=n_D; 4185 if (pcbddc->coarse_phi_D) { 4186 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4187 } 4188 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4189 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4190 PetscScalar *marray; 4191 4192 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4193 ierr = PetscFree(marray);CHKERRQ(ierr); 4194 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4195 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4196 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4197 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4198 } 4199 } 4200 4201 if (!pcbddc->coarse_phi_B) { 4202 PetscScalar *marr; 4203 4204 /* memory size */ 4205 n = n_B*pcbddc->local_primal_size; 4206 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4207 if (!pcbddc->symmetric_primal) n *= 2; 4208 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4209 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4210 marr += n_B*pcbddc->local_primal_size; 4211 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4212 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4213 marr += n_D*pcbddc->local_primal_size; 4214 } 4215 if (!pcbddc->symmetric_primal) { 4216 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4217 marr += n_B*pcbddc->local_primal_size; 4218 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4219 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4220 } 4221 } else { 4222 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4223 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4224 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4225 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4226 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4227 } 4228 } 4229 } 4230 4231 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4232 p0_lidx_I = NULL; 4233 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4234 const PetscInt *idxs; 4235 4236 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4237 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4238 for (i=0;i<pcbddc->benign_n;i++) { 4239 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4240 } 4241 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4242 } 4243 4244 /* vertices */ 4245 if (n_vertices) { 4246 PetscBool restoreavr = PETSC_FALSE; 4247 4248 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4249 4250 if (n_R) { 4251 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4252 PetscBLASInt B_N,B_one = 1; 4253 const PetscScalar *x; 4254 PetscScalar *y; 4255 4256 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4257 if (need_benign_correction) { 4258 ISLocalToGlobalMapping RtoN; 4259 IS is_p0; 4260 PetscInt *idxs_p0,n; 4261 4262 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4263 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4264 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4265 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4266 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4267 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4268 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4269 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4270 } 4271 4272 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4273 if (!sparserhs || need_benign_correction) { 4274 if (lda_rhs == n_R) { 4275 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4276 } else { 4277 PetscScalar *av,*array; 4278 const PetscInt *xadj,*adjncy; 4279 PetscInt n; 4280 PetscBool flg_row; 4281 4282 array = work+lda_rhs*n_vertices; 4283 ierr = PetscArrayzero(array,lda_rhs*n_vertices);CHKERRQ(ierr); 4284 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4285 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4286 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4287 for (i=0;i<n;i++) { 4288 PetscInt j; 4289 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4290 } 4291 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4292 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4293 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4294 } 4295 if (need_benign_correction) { 4296 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4297 PetscScalar *marr; 4298 4299 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4300 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4301 4302 | 0 0 0 | (V) 4303 L = | 0 0 -1 | (P-p0) 4304 | 0 0 -1 | (p0) 4305 4306 */ 4307 for (i=0;i<reuse_solver->benign_n;i++) { 4308 const PetscScalar *vals; 4309 const PetscInt *idxs,*idxs_zero; 4310 PetscInt n,j,nz; 4311 4312 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4313 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4314 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4315 for (j=0;j<n;j++) { 4316 PetscScalar val = vals[j]; 4317 PetscInt k,col = idxs[j]; 4318 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4319 } 4320 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4321 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4322 } 4323 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4324 } 4325 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4326 Brhs = A_RV; 4327 } else { 4328 Mat tA_RVT,A_RVT; 4329 4330 if (!pcbddc->symmetric_primal) { 4331 /* A_RV already scaled by -1 */ 4332 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4333 } else { 4334 restoreavr = PETSC_TRUE; 4335 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4336 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4337 A_RVT = A_VR; 4338 } 4339 if (lda_rhs != n_R) { 4340 PetscScalar *aa; 4341 PetscInt r,*ii,*jj; 4342 PetscBool done; 4343 4344 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4345 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4346 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4347 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4348 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4349 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4350 } else { 4351 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4352 tA_RVT = A_RVT; 4353 } 4354 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4355 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4356 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4357 } 4358 if (F) { 4359 /* need to correct the rhs */ 4360 if (need_benign_correction) { 4361 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4362 PetscScalar *marr; 4363 4364 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4365 if (lda_rhs != n_R) { 4366 for (i=0;i<n_vertices;i++) { 4367 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4368 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4369 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4370 } 4371 } else { 4372 for (i=0;i<n_vertices;i++) { 4373 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4374 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4375 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4376 } 4377 } 4378 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4379 } 4380 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4381 if (restoreavr) { 4382 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4383 } 4384 /* need to correct the solution */ 4385 if (need_benign_correction) { 4386 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4387 PetscScalar *marr; 4388 4389 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4390 if (lda_rhs != n_R) { 4391 for (i=0;i<n_vertices;i++) { 4392 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4393 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4394 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4395 } 4396 } else { 4397 for (i=0;i<n_vertices;i++) { 4398 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4399 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4400 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4401 } 4402 } 4403 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4404 } 4405 } else { 4406 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4407 for (i=0;i<n_vertices;i++) { 4408 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4409 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4410 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4411 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4412 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4413 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4414 } 4415 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4416 } 4417 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4418 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4419 /* S_VV and S_CV */ 4420 if (n_constraints) { 4421 Mat B; 4422 4423 ierr = PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);CHKERRQ(ierr); 4424 for (i=0;i<n_vertices;i++) { 4425 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4426 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4427 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4428 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4429 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4430 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4431 } 4432 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4433 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4434 ierr = MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);CHKERRQ(ierr); 4435 ierr = MatProductSetType(S_CV,MATPRODUCT_AB);CHKERRQ(ierr); 4436 ierr = MatProductSetFromOptions(S_CV);CHKERRQ(ierr); 4437 ierr = MatProductSymbolic(S_CV);CHKERRQ(ierr); 4438 ierr = MatProductNumeric(S_CV);CHKERRQ(ierr); 4439 ierr = MatProductClear(S_CV);CHKERRQ(ierr); 4440 4441 ierr = MatDestroy(&B);CHKERRQ(ierr); 4442 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4443 /* Reuse B = local_auxmat2_R * S_CV */ 4444 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);CHKERRQ(ierr); 4445 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4446 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4447 ierr = MatProductSymbolic(B);CHKERRQ(ierr); 4448 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4449 4450 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4451 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4452 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4453 ierr = MatDestroy(&B);CHKERRQ(ierr); 4454 } 4455 if (lda_rhs != n_R) { 4456 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4457 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4458 ierr = MatDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4459 } 4460 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4461 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4462 if (need_benign_correction) { 4463 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4464 PetscScalar *marr,*sums; 4465 4466 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4467 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4468 for (i=0;i<reuse_solver->benign_n;i++) { 4469 const PetscScalar *vals; 4470 const PetscInt *idxs,*idxs_zero; 4471 PetscInt n,j,nz; 4472 4473 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4474 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4475 for (j=0;j<n_vertices;j++) { 4476 PetscInt k; 4477 sums[j] = 0.; 4478 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4479 } 4480 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4481 for (j=0;j<n;j++) { 4482 PetscScalar val = vals[j]; 4483 PetscInt k; 4484 for (k=0;k<n_vertices;k++) { 4485 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4486 } 4487 } 4488 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4489 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4490 } 4491 ierr = PetscFree(sums);CHKERRQ(ierr); 4492 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4493 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4494 } 4495 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4496 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4497 ierr = MatDenseGetArrayRead(A_VV,&x);CHKERRQ(ierr); 4498 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4499 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4500 ierr = MatDenseRestoreArrayRead(A_VV,&x);CHKERRQ(ierr); 4501 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4502 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4503 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4504 } else { 4505 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4506 } 4507 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4508 4509 /* coarse basis functions */ 4510 for (i=0;i<n_vertices;i++) { 4511 PetscScalar *y; 4512 4513 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4514 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4515 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4516 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4517 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4518 y[n_B*i+idx_V_B[i]] = 1.0; 4519 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4520 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4521 4522 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4523 PetscInt j; 4524 4525 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4526 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4527 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4528 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4529 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4530 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4531 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4532 } 4533 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4534 } 4535 /* if n_R == 0 the object is not destroyed */ 4536 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4537 } 4538 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4539 4540 if (n_constraints) { 4541 Mat B; 4542 4543 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4544 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4545 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);CHKERRQ(ierr); 4546 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4547 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4548 ierr = MatProductSymbolic(B);CHKERRQ(ierr); 4549 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4550 4551 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4552 if (n_vertices) { 4553 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4554 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4555 } else { 4556 Mat S_VCt; 4557 4558 if (lda_rhs != n_R) { 4559 ierr = MatDestroy(&B);CHKERRQ(ierr); 4560 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4561 ierr = MatDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4562 } 4563 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4564 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4565 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4566 } 4567 } 4568 ierr = MatDestroy(&B);CHKERRQ(ierr); 4569 /* coarse basis functions */ 4570 for (i=0;i<n_constraints;i++) { 4571 PetscScalar *y; 4572 4573 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4574 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4575 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4576 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4577 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4578 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4579 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4580 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4581 PetscInt j; 4582 4583 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4584 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4585 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4586 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4587 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4588 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4589 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4590 } 4591 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4592 } 4593 } 4594 if (n_constraints) { 4595 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4596 } 4597 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4598 4599 /* coarse matrix entries relative to B_0 */ 4600 if (pcbddc->benign_n) { 4601 Mat B0_B,B0_BPHI; 4602 IS is_dummy; 4603 const PetscScalar *data; 4604 PetscInt j; 4605 4606 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4607 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4608 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4609 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4610 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4611 ierr = MatDenseGetArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4612 for (j=0;j<pcbddc->benign_n;j++) { 4613 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4614 for (i=0;i<pcbddc->local_primal_size;i++) { 4615 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4616 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4617 } 4618 } 4619 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4620 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4621 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4622 } 4623 4624 /* compute other basis functions for non-symmetric problems */ 4625 if (!pcbddc->symmetric_primal) { 4626 Mat B_V=NULL,B_C=NULL; 4627 PetscScalar *marray; 4628 4629 if (n_constraints) { 4630 Mat S_CCT,C_CRT; 4631 4632 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4633 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4634 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4635 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4636 if (n_vertices) { 4637 Mat S_VCT; 4638 4639 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4640 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4641 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4642 } 4643 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4644 } else { 4645 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4646 } 4647 if (n_vertices && n_R) { 4648 PetscScalar *av,*marray; 4649 const PetscInt *xadj,*adjncy; 4650 PetscInt n; 4651 PetscBool flg_row; 4652 4653 /* B_V = B_V - A_VR^T */ 4654 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4655 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4656 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4657 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4658 for (i=0;i<n;i++) { 4659 PetscInt j; 4660 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4661 } 4662 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4663 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4664 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4665 } 4666 4667 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4668 if (n_vertices) { 4669 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4670 for (i=0;i<n_vertices;i++) { 4671 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4672 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4673 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4674 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4675 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4676 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4677 } 4678 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4679 } 4680 if (B_C) { 4681 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4682 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4683 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4684 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4685 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4686 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4687 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4688 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4689 } 4690 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4691 } 4692 /* coarse basis functions */ 4693 for (i=0;i<pcbddc->local_primal_size;i++) { 4694 PetscScalar *y; 4695 4696 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4697 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4698 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4699 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4700 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4701 if (i<n_vertices) { 4702 y[n_B*i+idx_V_B[i]] = 1.0; 4703 } 4704 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4705 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4706 4707 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4708 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4709 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4710 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4711 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4712 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4713 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4714 } 4715 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4716 } 4717 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4718 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4719 } 4720 4721 /* free memory */ 4722 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4723 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4724 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4725 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4726 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4727 ierr = PetscFree(work);CHKERRQ(ierr); 4728 if (n_vertices) { 4729 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4730 } 4731 if (n_constraints) { 4732 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4733 } 4734 ierr = PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 4735 4736 /* Checking coarse_sub_mat and coarse basis functios */ 4737 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4738 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4739 if (pcbddc->dbg_flag) { 4740 Mat coarse_sub_mat; 4741 Mat AUXMAT,TM1,TM2,TM3,TM4; 4742 Mat coarse_phi_D,coarse_phi_B; 4743 Mat coarse_psi_D,coarse_psi_B; 4744 Mat A_II,A_BB,A_IB,A_BI; 4745 Mat C_B,CPHI; 4746 IS is_dummy; 4747 Vec mones; 4748 MatType checkmattype=MATSEQAIJ; 4749 PetscReal real_value; 4750 4751 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4752 Mat A; 4753 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4754 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4755 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4756 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4757 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4758 ierr = MatDestroy(&A);CHKERRQ(ierr); 4759 } else { 4760 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4761 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4762 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4763 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4764 } 4765 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4766 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4767 if (!pcbddc->symmetric_primal) { 4768 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4769 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4770 } 4771 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4772 4773 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4774 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4775 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4776 if (!pcbddc->symmetric_primal) { 4777 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4778 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4779 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4780 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4781 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4782 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4783 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4784 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4785 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4786 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4787 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4788 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4789 } else { 4790 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4791 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4792 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4793 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4794 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4795 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4796 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4797 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4798 } 4799 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4800 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4801 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4802 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4803 if (pcbddc->benign_n) { 4804 Mat B0_B,B0_BPHI; 4805 const PetscScalar *data2; 4806 PetscScalar *data; 4807 PetscInt j; 4808 4809 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4810 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4811 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4812 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4813 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4814 ierr = MatDenseGetArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4815 for (j=0;j<pcbddc->benign_n;j++) { 4816 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4817 for (i=0;i<pcbddc->local_primal_size;i++) { 4818 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4819 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4820 } 4821 } 4822 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4823 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4824 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4825 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4826 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4827 } 4828 #if 0 4829 { 4830 PetscViewer viewer; 4831 char filename[256]; 4832 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4833 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4834 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4835 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4836 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4837 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4838 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4839 if (pcbddc->coarse_phi_B) { 4840 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4841 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4842 } 4843 if (pcbddc->coarse_phi_D) { 4844 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4845 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4846 } 4847 if (pcbddc->coarse_psi_B) { 4848 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4849 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4850 } 4851 if (pcbddc->coarse_psi_D) { 4852 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4853 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4854 } 4855 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4856 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4857 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4858 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4859 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4860 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4861 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4862 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4863 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4864 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4865 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4866 } 4867 #endif 4868 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4869 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4870 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4872 4873 /* check constraints */ 4874 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4875 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4876 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4877 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4878 } else { 4879 PetscScalar *data; 4880 Mat tmat; 4881 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4882 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4883 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4884 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4885 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4886 } 4887 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4888 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4889 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4890 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4891 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4892 if (!pcbddc->symmetric_primal) { 4893 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4894 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4895 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4896 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4897 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4898 } 4899 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4900 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4901 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4902 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4903 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4904 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4905 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4906 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4907 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4908 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4909 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4910 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4911 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4912 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4913 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4914 if (!pcbddc->symmetric_primal) { 4915 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4916 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4917 } 4918 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4919 } 4920 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4921 { 4922 PetscBool gpu; 4923 4924 ierr = PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);CHKERRQ(ierr); 4925 if (gpu) { 4926 if (pcbddc->local_auxmat1) { 4927 ierr = MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4928 } 4929 if (pcbddc->local_auxmat2) { 4930 ierr = MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4931 } 4932 if (pcbddc->coarse_phi_B) { 4933 ierr = MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4934 } 4935 if (pcbddc->coarse_phi_D) { 4936 ierr = MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4937 } 4938 if (pcbddc->coarse_psi_B) { 4939 ierr = MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4940 } 4941 if (pcbddc->coarse_psi_D) { 4942 ierr = MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4943 } 4944 } 4945 } 4946 /* get back data */ 4947 *coarse_submat_vals_n = coarse_submat_vals; 4948 PetscFunctionReturn(0); 4949 } 4950 4951 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4952 { 4953 Mat *work_mat; 4954 IS isrow_s,iscol_s; 4955 PetscBool rsorted,csorted; 4956 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4957 PetscErrorCode ierr; 4958 4959 PetscFunctionBegin; 4960 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4961 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4962 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4963 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4964 4965 if (!rsorted) { 4966 const PetscInt *idxs; 4967 PetscInt *idxs_sorted,i; 4968 4969 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4970 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4971 for (i=0;i<rsize;i++) { 4972 idxs_perm_r[i] = i; 4973 } 4974 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4975 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4976 for (i=0;i<rsize;i++) { 4977 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4978 } 4979 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4980 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4981 } else { 4982 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4983 isrow_s = isrow; 4984 } 4985 4986 if (!csorted) { 4987 if (isrow == iscol) { 4988 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4989 iscol_s = isrow_s; 4990 } else { 4991 const PetscInt *idxs; 4992 PetscInt *idxs_sorted,i; 4993 4994 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4995 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4996 for (i=0;i<csize;i++) { 4997 idxs_perm_c[i] = i; 4998 } 4999 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 5000 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 5001 for (i=0;i<csize;i++) { 5002 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5003 } 5004 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 5005 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 5006 } 5007 } else { 5008 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 5009 iscol_s = iscol; 5010 } 5011 5012 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5013 5014 if (!rsorted || !csorted) { 5015 Mat new_mat; 5016 IS is_perm_r,is_perm_c; 5017 5018 if (!rsorted) { 5019 PetscInt *idxs_r,i; 5020 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 5021 for (i=0;i<rsize;i++) { 5022 idxs_r[idxs_perm_r[i]] = i; 5023 } 5024 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 5025 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 5026 } else { 5027 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 5028 } 5029 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 5030 5031 if (!csorted) { 5032 if (isrow_s == iscol_s) { 5033 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 5034 is_perm_c = is_perm_r; 5035 } else { 5036 PetscInt *idxs_c,i; 5037 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5038 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 5039 for (i=0;i<csize;i++) { 5040 idxs_c[idxs_perm_c[i]] = i; 5041 } 5042 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 5043 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 5044 } 5045 } else { 5046 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 5047 } 5048 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 5049 5050 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 5051 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 5052 work_mat[0] = new_mat; 5053 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 5054 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 5055 } 5056 5057 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 5058 *B = work_mat[0]; 5059 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 5060 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 5061 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 5062 PetscFunctionReturn(0); 5063 } 5064 5065 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5066 { 5067 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5068 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5069 Mat new_mat,lA; 5070 IS is_local,is_global; 5071 PetscInt local_size; 5072 PetscBool isseqaij; 5073 PetscErrorCode ierr; 5074 5075 PetscFunctionBegin; 5076 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5077 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 5078 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 5079 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 5080 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 5081 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 5082 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5083 5084 if (pcbddc->dbg_flag) { 5085 Vec x,x_change; 5086 PetscReal error; 5087 5088 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 5089 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5090 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 5091 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5092 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5093 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 5094 if (!pcbddc->change_interior) { 5095 const PetscScalar *x,*y,*v; 5096 PetscReal lerror = 0.; 5097 PetscInt i; 5098 5099 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 5100 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 5101 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 5102 for (i=0;i<local_size;i++) 5103 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5104 lerror = PetscAbsScalar(x[i]-y[i]); 5105 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 5106 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 5107 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 5108 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5109 if (error > PETSC_SMALL) { 5110 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5111 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 5112 } else { 5113 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 5114 } 5115 } 5116 } 5117 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5118 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5119 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5120 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5121 if (error > PETSC_SMALL) { 5122 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5123 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5124 } else { 5125 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5126 } 5127 } 5128 ierr = VecDestroy(&x);CHKERRQ(ierr); 5129 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5130 } 5131 5132 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5133 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 5134 5135 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5136 ierr = PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5137 if (isseqaij) { 5138 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5139 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5140 if (lA) { 5141 Mat work; 5142 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5143 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5144 ierr = MatDestroy(&work);CHKERRQ(ierr); 5145 } 5146 } else { 5147 Mat work_mat; 5148 5149 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5150 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5151 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5152 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 5153 if (lA) { 5154 Mat work; 5155 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5156 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5157 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5158 ierr = MatDestroy(&work);CHKERRQ(ierr); 5159 } 5160 } 5161 if (matis->A->symmetric_set) { 5162 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 5163 #if !defined(PETSC_USE_COMPLEX) 5164 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 5165 #endif 5166 } 5167 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 5168 PetscFunctionReturn(0); 5169 } 5170 5171 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5172 { 5173 PC_IS* pcis = (PC_IS*)(pc->data); 5174 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5175 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5176 PetscInt *idx_R_local=NULL; 5177 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5178 PetscInt vbs,bs; 5179 PetscBT bitmask=NULL; 5180 PetscErrorCode ierr; 5181 5182 PetscFunctionBegin; 5183 /* 5184 No need to setup local scatters if 5185 - primal space is unchanged 5186 AND 5187 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5188 AND 5189 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5190 */ 5191 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5192 PetscFunctionReturn(0); 5193 } 5194 /* destroy old objects */ 5195 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5196 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5197 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5198 /* Set Non-overlapping dimensions */ 5199 n_B = pcis->n_B; 5200 n_D = pcis->n - n_B; 5201 n_vertices = pcbddc->n_vertices; 5202 5203 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5204 5205 /* create auxiliary bitmask and allocate workspace */ 5206 if (!sub_schurs || !sub_schurs->reuse_solver) { 5207 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5208 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5209 for (i=0;i<n_vertices;i++) { 5210 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5211 } 5212 5213 for (i=0, n_R=0; i<pcis->n; i++) { 5214 if (!PetscBTLookup(bitmask,i)) { 5215 idx_R_local[n_R++] = i; 5216 } 5217 } 5218 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5219 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5220 5221 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5222 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5223 } 5224 5225 /* Block code */ 5226 vbs = 1; 5227 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5228 if (bs>1 && !(n_vertices%bs)) { 5229 PetscBool is_blocked = PETSC_TRUE; 5230 PetscInt *vary; 5231 if (!sub_schurs || !sub_schurs->reuse_solver) { 5232 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5233 ierr = PetscArrayzero(vary,pcis->n/bs);CHKERRQ(ierr); 5234 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5235 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5236 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5237 for (i=0; i<pcis->n/bs; i++) { 5238 if (vary[i]!=0 && vary[i]!=bs) { 5239 is_blocked = PETSC_FALSE; 5240 break; 5241 } 5242 } 5243 ierr = PetscFree(vary);CHKERRQ(ierr); 5244 } else { 5245 /* Verify directly the R set */ 5246 for (i=0; i<n_R/bs; i++) { 5247 PetscInt j,node=idx_R_local[bs*i]; 5248 for (j=1; j<bs; j++) { 5249 if (node != idx_R_local[bs*i+j]-j) { 5250 is_blocked = PETSC_FALSE; 5251 break; 5252 } 5253 } 5254 } 5255 } 5256 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5257 vbs = bs; 5258 for (i=0;i<n_R/vbs;i++) { 5259 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5260 } 5261 } 5262 } 5263 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5264 if (sub_schurs && sub_schurs->reuse_solver) { 5265 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5266 5267 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5268 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5269 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5270 reuse_solver->is_R = pcbddc->is_R_local; 5271 } else { 5272 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5273 } 5274 5275 /* print some info if requested */ 5276 if (pcbddc->dbg_flag) { 5277 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5278 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5279 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5282 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5283 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5284 } 5285 5286 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5287 if (!sub_schurs || !sub_schurs->reuse_solver) { 5288 IS is_aux1,is_aux2; 5289 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5290 5291 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5292 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5293 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5294 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5295 for (i=0; i<n_D; i++) { 5296 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5297 } 5298 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5299 for (i=0, j=0; i<n_R; i++) { 5300 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5301 aux_array1[j++] = i; 5302 } 5303 } 5304 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5305 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5306 for (i=0, j=0; i<n_B; i++) { 5307 if (!PetscBTLookup(bitmask,is_indices[i])) { 5308 aux_array2[j++] = i; 5309 } 5310 } 5311 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5312 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5313 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5314 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5315 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5316 5317 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5318 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5319 for (i=0, j=0; i<n_R; i++) { 5320 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5321 aux_array1[j++] = i; 5322 } 5323 } 5324 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5325 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5326 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5327 } 5328 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5329 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5330 } else { 5331 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5332 IS tis; 5333 PetscInt schur_size; 5334 5335 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5336 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5337 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5338 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5339 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5340 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5341 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5342 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5343 } 5344 } 5345 PetscFunctionReturn(0); 5346 } 5347 5348 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5349 { 5350 MatNullSpace NullSpace; 5351 Mat dmat; 5352 const Vec *nullvecs; 5353 Vec v,v2,*nullvecs2; 5354 VecScatter sct = NULL; 5355 PetscContainer c; 5356 PetscScalar *ddata; 5357 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5358 PetscBool nnsp_has_cnst; 5359 PetscErrorCode ierr; 5360 5361 PetscFunctionBegin; 5362 if (!is && !B) { /* MATIS */ 5363 Mat_IS* matis = (Mat_IS*)A->data; 5364 5365 if (!B) { 5366 ierr = MatISGetLocalMat(A,&B);CHKERRQ(ierr); 5367 } 5368 sct = matis->cctx; 5369 ierr = PetscObjectReference((PetscObject)sct);CHKERRQ(ierr); 5370 } else { 5371 ierr = MatGetNullSpace(B,&NullSpace);CHKERRQ(ierr); 5372 if (!NullSpace) { 5373 ierr = MatGetNearNullSpace(B,&NullSpace);CHKERRQ(ierr); 5374 } 5375 if (NullSpace) PetscFunctionReturn(0); 5376 } 5377 ierr = MatGetNullSpace(A,&NullSpace);CHKERRQ(ierr); 5378 if (!NullSpace) { 5379 ierr = MatGetNearNullSpace(A,&NullSpace);CHKERRQ(ierr); 5380 } 5381 if (!NullSpace) PetscFunctionReturn(0); 5382 5383 ierr = MatCreateVecs(A,&v,NULL);CHKERRQ(ierr); 5384 ierr = MatCreateVecs(B,&v2,NULL);CHKERRQ(ierr); 5385 if (!sct) { 5386 ierr = VecScatterCreate(v,is,v2,NULL,&sct);CHKERRQ(ierr); 5387 } 5388 ierr = MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);CHKERRQ(ierr); 5389 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5390 ierr = PetscMalloc1(bsiz,&nullvecs2);CHKERRQ(ierr); 5391 ierr = VecGetBlockSize(v2,&bs);CHKERRQ(ierr); 5392 ierr = VecGetSize(v2,&N);CHKERRQ(ierr); 5393 ierr = VecGetLocalSize(v2,&n);CHKERRQ(ierr); 5394 ierr = PetscMalloc1(n*bsiz,&ddata);CHKERRQ(ierr); 5395 for (k=0;k<nnsp_size;k++) { 5396 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);CHKERRQ(ierr); 5397 ierr = VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5398 ierr = VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5399 } 5400 if (nnsp_has_cnst) { 5401 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);CHKERRQ(ierr); 5402 ierr = VecSet(nullvecs2[nnsp_size],1.0);CHKERRQ(ierr); 5403 } 5404 ierr = PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);CHKERRQ(ierr); 5405 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);CHKERRQ(ierr); 5406 5407 ierr = MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);CHKERRQ(ierr); 5408 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);CHKERRQ(ierr); 5409 ierr = PetscContainerSetPointer(c,ddata);CHKERRQ(ierr); 5410 ierr = PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);CHKERRQ(ierr); 5411 ierr = PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);CHKERRQ(ierr); 5412 ierr = PetscContainerDestroy(&c);CHKERRQ(ierr); 5413 ierr = PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);CHKERRQ(ierr); 5414 ierr = MatDestroy(&dmat);CHKERRQ(ierr); 5415 5416 for (k=0;k<bsiz;k++) { 5417 ierr = VecDestroy(&nullvecs2[k]);CHKERRQ(ierr); 5418 } 5419 ierr = PetscFree(nullvecs2);CHKERRQ(ierr); 5420 ierr = MatSetNearNullSpace(B,NullSpace);CHKERRQ(ierr); 5421 ierr = MatNullSpaceDestroy(&NullSpace);CHKERRQ(ierr); 5422 ierr = VecDestroy(&v);CHKERRQ(ierr); 5423 ierr = VecDestroy(&v2);CHKERRQ(ierr); 5424 ierr = VecScatterDestroy(&sct);CHKERRQ(ierr); 5425 PetscFunctionReturn(0); 5426 } 5427 5428 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5429 { 5430 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5431 PC_IS *pcis = (PC_IS*)pc->data; 5432 PC pc_temp; 5433 Mat A_RR; 5434 MatNullSpace nnsp; 5435 MatReuse reuse; 5436 PetscScalar m_one = -1.0; 5437 PetscReal value; 5438 PetscInt n_D,n_R; 5439 PetscBool issbaij,opts; 5440 PetscErrorCode ierr; 5441 void (*f)(void) = NULL; 5442 char dir_prefix[256],neu_prefix[256],str_level[16]; 5443 size_t len; 5444 5445 PetscFunctionBegin; 5446 ierr = PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5447 /* approximate solver, propagate NearNullSpace if needed */ 5448 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5449 MatNullSpace gnnsp1,gnnsp2; 5450 PetscBool lhas,ghas; 5451 5452 ierr = MatGetNearNullSpace(pcbddc->local_mat,&nnsp);CHKERRQ(ierr); 5453 ierr = MatGetNearNullSpace(pc->pmat,&gnnsp1);CHKERRQ(ierr); 5454 ierr = MatGetNullSpace(pc->pmat,&gnnsp2);CHKERRQ(ierr); 5455 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5456 ierr = MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5457 if (!ghas && (gnnsp1 || gnnsp2)) { 5458 ierr = MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);CHKERRQ(ierr); 5459 } 5460 } 5461 5462 /* compute prefixes */ 5463 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5464 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5465 if (!pcbddc->current_level) { 5466 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5467 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5468 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5469 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5470 } else { 5471 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5472 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5473 len -= 15; /* remove "pc_bddc_coarse_" */ 5474 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5475 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5476 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5477 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5478 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5479 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5480 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5481 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5482 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5483 } 5484 5485 /* DIRICHLET PROBLEM */ 5486 if (dirichlet) { 5487 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5488 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5489 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5490 if (pcbddc->dbg_flag) { 5491 Mat A_IIn; 5492 5493 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5494 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5495 pcis->A_II = A_IIn; 5496 } 5497 } 5498 if (pcbddc->local_mat->symmetric_set) { 5499 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5500 } 5501 /* Matrix for Dirichlet problem is pcis->A_II */ 5502 n_D = pcis->n - pcis->n_B; 5503 opts = PETSC_FALSE; 5504 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5505 opts = PETSC_TRUE; 5506 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5507 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5508 /* default */ 5509 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5510 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5511 ierr = PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5512 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5513 if (issbaij) { 5514 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5515 } else { 5516 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5517 } 5518 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 5519 } 5520 ierr = MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);CHKERRQ(ierr); 5521 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);CHKERRQ(ierr); 5522 /* Allow user's customization */ 5523 if (opts) { 5524 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5525 } 5526 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5527 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5528 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);CHKERRQ(ierr); 5529 } 5530 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5531 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5532 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5533 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5534 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5535 const PetscInt *idxs; 5536 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5537 5538 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5539 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5540 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5541 for (i=0;i<nl;i++) { 5542 for (d=0;d<cdim;d++) { 5543 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5544 } 5545 } 5546 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5547 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5548 ierr = PetscFree(scoords);CHKERRQ(ierr); 5549 } 5550 if (sub_schurs && sub_schurs->reuse_solver) { 5551 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5552 5553 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5554 } 5555 5556 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5557 if (!n_D) { 5558 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5559 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5560 } 5561 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5562 /* set ksp_D into pcis data */ 5563 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5564 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5565 pcis->ksp_D = pcbddc->ksp_D; 5566 } 5567 5568 /* NEUMANN PROBLEM */ 5569 A_RR = NULL; 5570 if (neumann) { 5571 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5572 PetscInt ibs,mbs; 5573 PetscBool issbaij, reuse_neumann_solver; 5574 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5575 5576 reuse_neumann_solver = PETSC_FALSE; 5577 if (sub_schurs && sub_schurs->reuse_solver) { 5578 IS iP; 5579 5580 reuse_neumann_solver = PETSC_TRUE; 5581 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5582 if (iP) reuse_neumann_solver = PETSC_FALSE; 5583 } 5584 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5585 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5586 if (pcbddc->ksp_R) { /* already created ksp */ 5587 PetscInt nn_R; 5588 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5589 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5590 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5591 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5592 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5593 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5594 reuse = MAT_INITIAL_MATRIX; 5595 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5596 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5597 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5598 reuse = MAT_INITIAL_MATRIX; 5599 } else { /* safe to reuse the matrix */ 5600 reuse = MAT_REUSE_MATRIX; 5601 } 5602 } 5603 /* last check */ 5604 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5605 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5606 reuse = MAT_INITIAL_MATRIX; 5607 } 5608 } else { /* first time, so we need to create the matrix */ 5609 reuse = MAT_INITIAL_MATRIX; 5610 } 5611 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5612 TODO: Get Rid of these conversions */ 5613 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5614 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5615 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5616 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5617 if (matis->A == pcbddc->local_mat) { 5618 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5619 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5620 } else { 5621 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5622 } 5623 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5624 if (matis->A == pcbddc->local_mat) { 5625 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5626 ierr = MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5627 } else { 5628 ierr = MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5629 } 5630 } 5631 /* extract A_RR */ 5632 if (reuse_neumann_solver) { 5633 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5634 5635 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5636 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5637 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5638 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5639 } else { 5640 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5641 } 5642 } else { 5643 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5644 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5645 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5646 } 5647 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5648 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5649 } 5650 if (pcbddc->local_mat->symmetric_set) { 5651 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5652 } 5653 opts = PETSC_FALSE; 5654 if (!pcbddc->ksp_R) { /* create object if not present */ 5655 opts = PETSC_TRUE; 5656 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5657 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5658 /* default */ 5659 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5660 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5661 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5662 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5663 if (issbaij) { 5664 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5665 } else { 5666 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5667 } 5668 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 5669 } 5670 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5671 ierr = MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);CHKERRQ(ierr); 5672 if (opts) { /* Allow user's customization once */ 5673 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5674 } 5675 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5676 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5677 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);CHKERRQ(ierr); 5678 } 5679 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5680 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5681 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5682 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5683 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5684 const PetscInt *idxs; 5685 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5686 5687 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5688 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5689 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5690 for (i=0;i<nl;i++) { 5691 for (d=0;d<cdim;d++) { 5692 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5693 } 5694 } 5695 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5696 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5697 ierr = PetscFree(scoords);CHKERRQ(ierr); 5698 } 5699 5700 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5701 if (!n_R) { 5702 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5703 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5704 } 5705 /* Reuse solver if it is present */ 5706 if (reuse_neumann_solver) { 5707 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5708 5709 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5710 } 5711 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5712 } 5713 5714 if (pcbddc->dbg_flag) { 5715 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5716 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5717 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5718 } 5719 ierr = PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5720 5721 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5722 if (pcbddc->NullSpace_corr[0]) { 5723 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5724 } 5725 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5726 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5727 } 5728 if (neumann && pcbddc->NullSpace_corr[2]) { 5729 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5730 } 5731 /* check Dirichlet and Neumann solvers */ 5732 if (pcbddc->dbg_flag) { 5733 if (dirichlet) { /* Dirichlet */ 5734 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5735 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5736 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5737 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 5738 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5739 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5740 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5741 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5742 } 5743 if (neumann) { /* Neumann */ 5744 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5745 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5746 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5747 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 5748 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5749 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5750 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5751 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5752 } 5753 } 5754 /* free Neumann problem's matrix */ 5755 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5756 PetscFunctionReturn(0); 5757 } 5758 5759 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5760 { 5761 PetscErrorCode ierr; 5762 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5763 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5764 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5765 5766 PetscFunctionBegin; 5767 if (!reuse_solver) { 5768 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5769 } 5770 if (!pcbddc->switch_static) { 5771 if (applytranspose && pcbddc->local_auxmat1) { 5772 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5773 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5774 } 5775 if (!reuse_solver) { 5776 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5777 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5778 } else { 5779 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5780 5781 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5782 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5783 } 5784 } else { 5785 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5786 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5787 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5788 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5789 if (applytranspose && pcbddc->local_auxmat1) { 5790 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5791 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5792 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5793 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5794 } 5795 } 5796 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);CHKERRQ(ierr); 5797 if (!reuse_solver || pcbddc->switch_static) { 5798 if (applytranspose) { 5799 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5800 } else { 5801 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5802 } 5803 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);CHKERRQ(ierr); 5804 } else { 5805 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5806 5807 if (applytranspose) { 5808 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5809 } else { 5810 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5811 } 5812 } 5813 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);CHKERRQ(ierr); 5814 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5815 if (!pcbddc->switch_static) { 5816 if (!reuse_solver) { 5817 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5818 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5819 } else { 5820 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5821 5822 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5823 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5824 } 5825 if (!applytranspose && pcbddc->local_auxmat1) { 5826 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5827 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5828 } 5829 } else { 5830 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5831 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5832 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5833 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5834 if (!applytranspose && pcbddc->local_auxmat1) { 5835 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5836 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5837 } 5838 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5839 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5840 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5841 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5842 } 5843 PetscFunctionReturn(0); 5844 } 5845 5846 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5847 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5848 { 5849 PetscErrorCode ierr; 5850 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5851 PC_IS* pcis = (PC_IS*) (pc->data); 5852 const PetscScalar zero = 0.0; 5853 5854 PetscFunctionBegin; 5855 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5856 if (!pcbddc->benign_apply_coarse_only) { 5857 if (applytranspose) { 5858 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5859 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5860 } else { 5861 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5862 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5863 } 5864 } else { 5865 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5866 } 5867 5868 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5869 if (pcbddc->benign_n) { 5870 PetscScalar *array; 5871 PetscInt j; 5872 5873 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5874 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5875 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5876 } 5877 5878 /* start communications from local primal nodes to rhs of coarse solver */ 5879 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5880 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5881 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5882 5883 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5884 if (pcbddc->coarse_ksp) { 5885 Mat coarse_mat; 5886 Vec rhs,sol; 5887 MatNullSpace nullsp; 5888 PetscBool isbddc = PETSC_FALSE; 5889 5890 if (pcbddc->benign_have_null) { 5891 PC coarse_pc; 5892 5893 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5894 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5895 /* we need to propagate to coarser levels the need for a possible benign correction */ 5896 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5897 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5898 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5899 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5900 } 5901 } 5902 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5903 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5904 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5905 if (applytranspose) { 5906 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5907 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5908 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5909 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5910 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5911 ierr = MatGetTransposeNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5912 if (nullsp) { 5913 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5914 } 5915 } else { 5916 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5917 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5918 PC coarse_pc; 5919 5920 if (nullsp) { 5921 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5922 } 5923 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5924 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5925 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5926 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5927 } else { 5928 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5929 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5930 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5931 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5932 if (nullsp) { 5933 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5934 } 5935 } 5936 } 5937 /* we don't need the benign correction at coarser levels anymore */ 5938 if (pcbddc->benign_have_null && isbddc) { 5939 PC coarse_pc; 5940 PC_BDDC* coarsepcbddc; 5941 5942 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5943 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5944 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5945 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5946 } 5947 } 5948 5949 /* Local solution on R nodes */ 5950 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5951 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5952 } 5953 /* communications from coarse sol to local primal nodes */ 5954 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5955 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5956 5957 /* Sum contributions from the two levels */ 5958 if (!pcbddc->benign_apply_coarse_only) { 5959 if (applytranspose) { 5960 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5961 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5962 } else { 5963 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5964 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5965 } 5966 /* store p0 */ 5967 if (pcbddc->benign_n) { 5968 PetscScalar *array; 5969 PetscInt j; 5970 5971 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5972 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5973 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5974 } 5975 } else { /* expand the coarse solution */ 5976 if (applytranspose) { 5977 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5978 } else { 5979 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5980 } 5981 } 5982 PetscFunctionReturn(0); 5983 } 5984 5985 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5986 { 5987 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5988 Vec from,to; 5989 const PetscScalar *array; 5990 PetscErrorCode ierr; 5991 5992 PetscFunctionBegin; 5993 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5994 from = pcbddc->coarse_vec; 5995 to = pcbddc->vec1_P; 5996 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5997 Vec tvec; 5998 5999 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 6000 ierr = VecResetArray(tvec);CHKERRQ(ierr); 6001 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 6002 ierr = VecGetArrayRead(tvec,&array);CHKERRQ(ierr); 6003 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 6004 ierr = VecRestoreArrayRead(tvec,&array);CHKERRQ(ierr); 6005 } 6006 } else { /* from local to global -> put data in coarse right hand side */ 6007 from = pcbddc->vec1_P; 6008 to = pcbddc->coarse_vec; 6009 } 6010 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6011 PetscFunctionReturn(0); 6012 } 6013 6014 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6015 { 6016 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 6017 Vec from,to; 6018 const PetscScalar *array; 6019 PetscErrorCode ierr; 6020 6021 PetscFunctionBegin; 6022 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6023 from = pcbddc->coarse_vec; 6024 to = pcbddc->vec1_P; 6025 } else { /* from local to global -> put data in coarse right hand side */ 6026 from = pcbddc->vec1_P; 6027 to = pcbddc->coarse_vec; 6028 } 6029 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6030 if (smode == SCATTER_FORWARD) { 6031 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6032 Vec tvec; 6033 6034 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 6035 ierr = VecGetArrayRead(to,&array);CHKERRQ(ierr); 6036 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 6037 ierr = VecRestoreArrayRead(to,&array);CHKERRQ(ierr); 6038 } 6039 } else { 6040 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6041 ierr = VecResetArray(from);CHKERRQ(ierr); 6042 } 6043 } 6044 PetscFunctionReturn(0); 6045 } 6046 6047 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6048 { 6049 PetscErrorCode ierr; 6050 PC_IS* pcis = (PC_IS*)(pc->data); 6051 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6052 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6053 /* one and zero */ 6054 PetscScalar one=1.0,zero=0.0; 6055 /* space to store constraints and their local indices */ 6056 PetscScalar *constraints_data; 6057 PetscInt *constraints_idxs,*constraints_idxs_B; 6058 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6059 PetscInt *constraints_n; 6060 /* iterators */ 6061 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6062 /* BLAS integers */ 6063 PetscBLASInt lwork,lierr; 6064 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6065 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6066 /* reuse */ 6067 PetscInt olocal_primal_size,olocal_primal_size_cc; 6068 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6069 /* change of basis */ 6070 PetscBool qr_needed; 6071 PetscBT change_basis,qr_needed_idx; 6072 /* auxiliary stuff */ 6073 PetscInt *nnz,*is_indices; 6074 PetscInt ncc; 6075 /* some quantities */ 6076 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6077 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6078 PetscReal tol; /* tolerance for retaining eigenmodes */ 6079 6080 PetscFunctionBegin; 6081 tol = PetscSqrtReal(PETSC_SMALL); 6082 /* Destroy Mat objects computed previously */ 6083 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6084 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6085 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 6086 /* save info on constraints from previous setup (if any) */ 6087 olocal_primal_size = pcbddc->local_primal_size; 6088 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6089 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 6090 ierr = PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);CHKERRQ(ierr); 6091 ierr = PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);CHKERRQ(ierr); 6092 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6093 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6094 6095 if (!pcbddc->adaptive_selection) { 6096 IS ISForVertices,*ISForFaces,*ISForEdges; 6097 MatNullSpace nearnullsp; 6098 const Vec *nearnullvecs; 6099 Vec *localnearnullsp; 6100 PetscScalar *array; 6101 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 6102 PetscBool nnsp_has_cnst; 6103 /* LAPACK working arrays for SVD or POD */ 6104 PetscBool skip_lapack,boolforchange; 6105 PetscScalar *work; 6106 PetscReal *singular_vals; 6107 #if defined(PETSC_USE_COMPLEX) 6108 PetscReal *rwork; 6109 #endif 6110 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6111 PetscBLASInt dummy_int=1; 6112 PetscScalar dummy_scalar=1.; 6113 PetscBool use_pod = PETSC_FALSE; 6114 6115 /* MKL SVD with same input gives different results on different processes! */ 6116 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL) 6117 use_pod = PETSC_TRUE; 6118 #endif 6119 /* Get index sets for faces, edges and vertices from graph */ 6120 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 6121 /* print some info */ 6122 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6123 PetscInt nv; 6124 6125 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6126 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 6127 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6128 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6129 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6130 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 6131 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 6132 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6133 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6134 } 6135 6136 /* free unneeded index sets */ 6137 if (!pcbddc->use_vertices) { 6138 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6139 } 6140 if (!pcbddc->use_edges) { 6141 for (i=0;i<n_ISForEdges;i++) { 6142 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6143 } 6144 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6145 n_ISForEdges = 0; 6146 } 6147 if (!pcbddc->use_faces) { 6148 for (i=0;i<n_ISForFaces;i++) { 6149 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6150 } 6151 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6152 n_ISForFaces = 0; 6153 } 6154 6155 /* check if near null space is attached to global mat */ 6156 if (pcbddc->use_nnsp) { 6157 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 6158 } else nearnullsp = NULL; 6159 6160 if (nearnullsp) { 6161 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 6162 /* remove any stored info */ 6163 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 6164 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6165 /* store information for BDDC solver reuse */ 6166 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 6167 pcbddc->onearnullspace = nearnullsp; 6168 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6169 for (i=0;i<nnsp_size;i++) { 6170 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 6171 } 6172 } else { /* if near null space is not provided BDDC uses constants by default */ 6173 nnsp_size = 0; 6174 nnsp_has_cnst = PETSC_TRUE; 6175 } 6176 /* get max number of constraints on a single cc */ 6177 max_constraints = nnsp_size; 6178 if (nnsp_has_cnst) max_constraints++; 6179 6180 /* 6181 Evaluate maximum storage size needed by the procedure 6182 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6183 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6184 There can be multiple constraints per connected component 6185 */ 6186 n_vertices = 0; 6187 if (ISForVertices) { 6188 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 6189 } 6190 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6191 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 6192 6193 total_counts = n_ISForFaces+n_ISForEdges; 6194 total_counts *= max_constraints; 6195 total_counts += n_vertices; 6196 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 6197 6198 total_counts = 0; 6199 max_size_of_constraint = 0; 6200 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6201 IS used_is; 6202 if (i<n_ISForEdges) { 6203 used_is = ISForEdges[i]; 6204 } else { 6205 used_is = ISForFaces[i-n_ISForEdges]; 6206 } 6207 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 6208 total_counts += j; 6209 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6210 } 6211 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 6212 6213 /* get local part of global near null space vectors */ 6214 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 6215 for (k=0;k<nnsp_size;k++) { 6216 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 6217 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6218 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6219 } 6220 6221 /* whether or not to skip lapack calls */ 6222 skip_lapack = PETSC_TRUE; 6223 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6224 6225 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6226 if (!skip_lapack) { 6227 PetscScalar temp_work; 6228 6229 if (use_pod) { 6230 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6231 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 6232 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 6233 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 6234 #if defined(PETSC_USE_COMPLEX) 6235 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 6236 #endif 6237 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6238 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6239 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 6240 lwork = -1; 6241 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6242 #if !defined(PETSC_USE_COMPLEX) 6243 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6244 #else 6245 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6246 #endif 6247 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6248 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6249 } else { 6250 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6251 /* SVD */ 6252 PetscInt max_n,min_n; 6253 max_n = max_size_of_constraint; 6254 min_n = max_constraints; 6255 if (max_size_of_constraint < max_constraints) { 6256 min_n = max_size_of_constraint; 6257 max_n = max_constraints; 6258 } 6259 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 6260 #if defined(PETSC_USE_COMPLEX) 6261 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 6262 #endif 6263 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6264 lwork = -1; 6265 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 6266 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 6267 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 6268 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6269 #if !defined(PETSC_USE_COMPLEX) 6270 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6271 #else 6272 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6273 #endif 6274 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6275 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6276 #else 6277 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6278 #endif /* on missing GESVD */ 6279 } 6280 /* Allocate optimal workspace */ 6281 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 6282 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 6283 } 6284 /* Now we can loop on constraining sets */ 6285 total_counts = 0; 6286 constraints_idxs_ptr[0] = 0; 6287 constraints_data_ptr[0] = 0; 6288 /* vertices */ 6289 if (n_vertices) { 6290 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6291 ierr = PetscArraycpy(constraints_idxs,is_indices,n_vertices);CHKERRQ(ierr); 6292 for (i=0;i<n_vertices;i++) { 6293 constraints_n[total_counts] = 1; 6294 constraints_data[total_counts] = 1.0; 6295 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6296 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6297 total_counts++; 6298 } 6299 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6300 n_vertices = total_counts; 6301 } 6302 6303 /* edges and faces */ 6304 total_counts_cc = total_counts; 6305 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6306 IS used_is; 6307 PetscBool idxs_copied = PETSC_FALSE; 6308 6309 if (ncc<n_ISForEdges) { 6310 used_is = ISForEdges[ncc]; 6311 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6312 } else { 6313 used_is = ISForFaces[ncc-n_ISForEdges]; 6314 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6315 } 6316 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6317 6318 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6319 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6320 /* change of basis should not be performed on local periodic nodes */ 6321 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6322 if (nnsp_has_cnst) { 6323 PetscScalar quad_value; 6324 6325 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6326 idxs_copied = PETSC_TRUE; 6327 6328 if (!pcbddc->use_nnsp_true) { 6329 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6330 } else { 6331 quad_value = 1.0; 6332 } 6333 for (j=0;j<size_of_constraint;j++) { 6334 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6335 } 6336 temp_constraints++; 6337 total_counts++; 6338 } 6339 for (k=0;k<nnsp_size;k++) { 6340 PetscReal real_value; 6341 PetscScalar *ptr_to_data; 6342 6343 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6344 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6345 for (j=0;j<size_of_constraint;j++) { 6346 ptr_to_data[j] = array[is_indices[j]]; 6347 } 6348 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6349 /* check if array is null on the connected component */ 6350 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6351 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6352 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6353 temp_constraints++; 6354 total_counts++; 6355 if (!idxs_copied) { 6356 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6357 idxs_copied = PETSC_TRUE; 6358 } 6359 } 6360 } 6361 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6362 valid_constraints = temp_constraints; 6363 if (!pcbddc->use_nnsp_true && temp_constraints) { 6364 if (temp_constraints == 1) { /* just normalize the constraint */ 6365 PetscScalar norm,*ptr_to_data; 6366 6367 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6368 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6369 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6370 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6371 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6372 } else { /* perform SVD */ 6373 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6374 6375 if (use_pod) { 6376 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6377 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6378 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6379 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6380 from that computed using LAPACKgesvd 6381 -> This is due to a different computation of eigenvectors in LAPACKheev 6382 -> The quality of the POD-computed basis will be the same */ 6383 ierr = PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);CHKERRQ(ierr); 6384 /* Store upper triangular part of correlation matrix */ 6385 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6386 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6387 for (j=0;j<temp_constraints;j++) { 6388 for (k=0;k<j+1;k++) { 6389 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6390 } 6391 } 6392 /* compute eigenvalues and eigenvectors of correlation matrix */ 6393 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6394 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6395 #if !defined(PETSC_USE_COMPLEX) 6396 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6397 #else 6398 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6399 #endif 6400 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6401 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6402 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6403 j = 0; 6404 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6405 total_counts = total_counts-j; 6406 valid_constraints = temp_constraints-j; 6407 /* scale and copy POD basis into used quadrature memory */ 6408 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6409 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6410 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6411 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6412 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6413 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6414 if (j<temp_constraints) { 6415 PetscInt ii; 6416 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6417 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6418 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6419 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6420 for (k=0;k<temp_constraints-j;k++) { 6421 for (ii=0;ii<size_of_constraint;ii++) { 6422 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6423 } 6424 } 6425 } 6426 } else { 6427 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6428 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6429 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6430 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6431 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6432 #if !defined(PETSC_USE_COMPLEX) 6433 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6434 #else 6435 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6436 #endif 6437 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6438 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6439 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6440 k = temp_constraints; 6441 if (k > size_of_constraint) k = size_of_constraint; 6442 j = 0; 6443 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6444 valid_constraints = k-j; 6445 total_counts = total_counts-temp_constraints+valid_constraints; 6446 #else 6447 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6448 #endif /* on missing GESVD */ 6449 } 6450 } 6451 } 6452 /* update pointers information */ 6453 if (valid_constraints) { 6454 constraints_n[total_counts_cc] = valid_constraints; 6455 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6456 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6457 /* set change_of_basis flag */ 6458 if (boolforchange) { 6459 PetscBTSet(change_basis,total_counts_cc); 6460 } 6461 total_counts_cc++; 6462 } 6463 } 6464 /* free workspace */ 6465 if (!skip_lapack) { 6466 ierr = PetscFree(work);CHKERRQ(ierr); 6467 #if defined(PETSC_USE_COMPLEX) 6468 ierr = PetscFree(rwork);CHKERRQ(ierr); 6469 #endif 6470 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6471 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6472 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6473 } 6474 for (k=0;k<nnsp_size;k++) { 6475 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6476 } 6477 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6478 /* free index sets of faces, edges and vertices */ 6479 for (i=0;i<n_ISForFaces;i++) { 6480 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6481 } 6482 if (n_ISForFaces) { 6483 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6484 } 6485 for (i=0;i<n_ISForEdges;i++) { 6486 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6487 } 6488 if (n_ISForEdges) { 6489 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6490 } 6491 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6492 } else { 6493 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6494 6495 total_counts = 0; 6496 n_vertices = 0; 6497 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6498 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6499 } 6500 max_constraints = 0; 6501 total_counts_cc = 0; 6502 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6503 total_counts += pcbddc->adaptive_constraints_n[i]; 6504 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6505 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6506 } 6507 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6508 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6509 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6510 constraints_data = pcbddc->adaptive_constraints_data; 6511 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6512 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6513 total_counts_cc = 0; 6514 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6515 if (pcbddc->adaptive_constraints_n[i]) { 6516 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6517 } 6518 } 6519 6520 max_size_of_constraint = 0; 6521 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6522 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6523 /* Change of basis */ 6524 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6525 if (pcbddc->use_change_of_basis) { 6526 for (i=0;i<sub_schurs->n_subs;i++) { 6527 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6528 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6529 } 6530 } 6531 } 6532 } 6533 pcbddc->local_primal_size = total_counts; 6534 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6535 6536 /* map constraints_idxs in boundary numbering */ 6537 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6538 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6539 6540 /* Create constraint matrix */ 6541 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6542 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6543 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6544 6545 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6546 /* determine if a QR strategy is needed for change of basis */ 6547 qr_needed = pcbddc->use_qr_single; 6548 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6549 total_primal_vertices=0; 6550 pcbddc->local_primal_size_cc = 0; 6551 for (i=0;i<total_counts_cc;i++) { 6552 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6553 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6554 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6555 pcbddc->local_primal_size_cc += 1; 6556 } else if (PetscBTLookup(change_basis,i)) { 6557 for (k=0;k<constraints_n[i];k++) { 6558 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6559 } 6560 pcbddc->local_primal_size_cc += constraints_n[i]; 6561 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6562 PetscBTSet(qr_needed_idx,i); 6563 qr_needed = PETSC_TRUE; 6564 } 6565 } else { 6566 pcbddc->local_primal_size_cc += 1; 6567 } 6568 } 6569 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6570 pcbddc->n_vertices = total_primal_vertices; 6571 /* permute indices in order to have a sorted set of vertices */ 6572 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6573 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6574 ierr = PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);CHKERRQ(ierr); 6575 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6576 6577 /* nonzero structure of constraint matrix */ 6578 /* and get reference dof for local constraints */ 6579 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6580 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6581 6582 j = total_primal_vertices; 6583 total_counts = total_primal_vertices; 6584 cum = total_primal_vertices; 6585 for (i=n_vertices;i<total_counts_cc;i++) { 6586 if (!PetscBTLookup(change_basis,i)) { 6587 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6588 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6589 cum++; 6590 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6591 for (k=0;k<constraints_n[i];k++) { 6592 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6593 nnz[j+k] = size_of_constraint; 6594 } 6595 j += constraints_n[i]; 6596 } 6597 } 6598 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6599 ierr = MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6600 ierr = PetscFree(nnz);CHKERRQ(ierr); 6601 6602 /* set values in constraint matrix */ 6603 for (i=0;i<total_primal_vertices;i++) { 6604 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6605 } 6606 total_counts = total_primal_vertices; 6607 for (i=n_vertices;i<total_counts_cc;i++) { 6608 if (!PetscBTLookup(change_basis,i)) { 6609 PetscInt *cols; 6610 6611 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6612 cols = constraints_idxs+constraints_idxs_ptr[i]; 6613 for (k=0;k<constraints_n[i];k++) { 6614 PetscInt row = total_counts+k; 6615 PetscScalar *vals; 6616 6617 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6618 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6619 } 6620 total_counts += constraints_n[i]; 6621 } 6622 } 6623 /* assembling */ 6624 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6625 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6626 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6627 6628 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6629 if (pcbddc->use_change_of_basis) { 6630 /* dual and primal dofs on a single cc */ 6631 PetscInt dual_dofs,primal_dofs; 6632 /* working stuff for GEQRF */ 6633 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6634 PetscBLASInt lqr_work; 6635 /* working stuff for UNGQR */ 6636 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6637 PetscBLASInt lgqr_work; 6638 /* working stuff for TRTRS */ 6639 PetscScalar *trs_rhs = NULL; 6640 PetscBLASInt Blas_NRHS; 6641 /* pointers for values insertion into change of basis matrix */ 6642 PetscInt *start_rows,*start_cols; 6643 PetscScalar *start_vals; 6644 /* working stuff for values insertion */ 6645 PetscBT is_primal; 6646 PetscInt *aux_primal_numbering_B; 6647 /* matrix sizes */ 6648 PetscInt global_size,local_size; 6649 /* temporary change of basis */ 6650 Mat localChangeOfBasisMatrix; 6651 /* extra space for debugging */ 6652 PetscScalar *dbg_work = NULL; 6653 6654 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6655 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6656 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6657 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6658 /* nonzeros for local mat */ 6659 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6660 if (!pcbddc->benign_change || pcbddc->fake_change) { 6661 for (i=0;i<pcis->n;i++) nnz[i]=1; 6662 } else { 6663 const PetscInt *ii; 6664 PetscInt n; 6665 PetscBool flg_row; 6666 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6667 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6668 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6669 } 6670 for (i=n_vertices;i<total_counts_cc;i++) { 6671 if (PetscBTLookup(change_basis,i)) { 6672 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6673 if (PetscBTLookup(qr_needed_idx,i)) { 6674 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6675 } else { 6676 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6677 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6678 } 6679 } 6680 } 6681 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6682 ierr = MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6683 ierr = PetscFree(nnz);CHKERRQ(ierr); 6684 /* Set interior change in the matrix */ 6685 if (!pcbddc->benign_change || pcbddc->fake_change) { 6686 for (i=0;i<pcis->n;i++) { 6687 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6688 } 6689 } else { 6690 const PetscInt *ii,*jj; 6691 PetscScalar *aa; 6692 PetscInt n; 6693 PetscBool flg_row; 6694 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6695 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6696 for (i=0;i<n;i++) { 6697 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6698 } 6699 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6700 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6701 } 6702 6703 if (pcbddc->dbg_flag) { 6704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6706 } 6707 6708 6709 /* Now we loop on the constraints which need a change of basis */ 6710 /* 6711 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6712 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6713 6714 Basic blocks of change of basis matrix T computed by 6715 6716 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6717 6718 | 1 0 ... 0 s_1/S | 6719 | 0 1 ... 0 s_2/S | 6720 | ... | 6721 | 0 ... 1 s_{n-1}/S | 6722 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6723 6724 with S = \sum_{i=1}^n s_i^2 6725 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6726 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6727 6728 - QR decomposition of constraints otherwise 6729 */ 6730 if (qr_needed && max_size_of_constraint) { 6731 /* space to store Q */ 6732 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6733 /* array to store scaling factors for reflectors */ 6734 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6735 /* first we issue queries for optimal work */ 6736 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6737 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6738 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6739 lqr_work = -1; 6740 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6741 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6742 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6743 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6744 lgqr_work = -1; 6745 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6746 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6747 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6748 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6749 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6750 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6751 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6752 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6753 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6754 /* array to store rhs and solution of triangular solver */ 6755 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6756 /* allocating workspace for check */ 6757 if (pcbddc->dbg_flag) { 6758 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6759 } 6760 } 6761 /* array to store whether a node is primal or not */ 6762 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6763 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6764 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6765 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6766 for (i=0;i<total_primal_vertices;i++) { 6767 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6768 } 6769 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6770 6771 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6772 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6773 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6774 if (PetscBTLookup(change_basis,total_counts)) { 6775 /* get constraint info */ 6776 primal_dofs = constraints_n[total_counts]; 6777 dual_dofs = size_of_constraint-primal_dofs; 6778 6779 if (pcbddc->dbg_flag) { 6780 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6781 } 6782 6783 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6784 6785 /* copy quadrature constraints for change of basis check */ 6786 if (pcbddc->dbg_flag) { 6787 ierr = PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6788 } 6789 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6790 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6791 6792 /* compute QR decomposition of constraints */ 6793 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6794 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6795 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6796 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6797 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6798 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6799 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6800 6801 /* explictly compute R^-T */ 6802 ierr = PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);CHKERRQ(ierr); 6803 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6804 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6805 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6806 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6807 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6808 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6809 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6810 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6811 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6812 6813 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6814 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6815 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6816 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6817 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6818 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6819 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6820 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6821 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6822 6823 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6824 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6825 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6826 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6827 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6828 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6829 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6830 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6831 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6832 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6833 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6834 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6835 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6836 6837 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6838 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6839 /* insert cols for primal dofs */ 6840 for (j=0;j<primal_dofs;j++) { 6841 start_vals = &qr_basis[j*size_of_constraint]; 6842 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6843 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6844 } 6845 /* insert cols for dual dofs */ 6846 for (j=0,k=0;j<dual_dofs;k++) { 6847 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6848 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6849 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6850 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6851 j++; 6852 } 6853 } 6854 6855 /* check change of basis */ 6856 if (pcbddc->dbg_flag) { 6857 PetscInt ii,jj; 6858 PetscBool valid_qr=PETSC_TRUE; 6859 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6860 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6861 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6862 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6863 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6864 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6865 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6866 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6867 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6868 for (jj=0;jj<size_of_constraint;jj++) { 6869 for (ii=0;ii<primal_dofs;ii++) { 6870 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6871 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6872 } 6873 } 6874 if (!valid_qr) { 6875 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6876 for (jj=0;jj<size_of_constraint;jj++) { 6877 for (ii=0;ii<primal_dofs;ii++) { 6878 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6880 } 6881 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6882 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6883 } 6884 } 6885 } 6886 } else { 6887 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6888 } 6889 } 6890 } else { /* simple transformation block */ 6891 PetscInt row,col; 6892 PetscScalar val,norm; 6893 6894 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6895 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6896 for (j=0;j<size_of_constraint;j++) { 6897 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6898 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6899 if (!PetscBTLookup(is_primal,row_B)) { 6900 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6901 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6902 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6903 } else { 6904 for (k=0;k<size_of_constraint;k++) { 6905 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6906 if (row != col) { 6907 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6908 } else { 6909 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6910 } 6911 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6912 } 6913 } 6914 } 6915 if (pcbddc->dbg_flag) { 6916 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6917 } 6918 } 6919 } else { 6920 if (pcbddc->dbg_flag) { 6921 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6922 } 6923 } 6924 } 6925 6926 /* free workspace */ 6927 if (qr_needed) { 6928 if (pcbddc->dbg_flag) { 6929 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6930 } 6931 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6932 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6933 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6934 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6935 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6936 } 6937 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6938 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6939 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6940 6941 /* assembling of global change of variable */ 6942 if (!pcbddc->fake_change) { 6943 Mat tmat; 6944 PetscInt bs; 6945 6946 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6947 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6948 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6949 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6950 ierr = MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6951 ierr = MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6952 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6953 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6954 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6955 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6956 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6957 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6958 ierr = MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6959 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6960 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6961 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6962 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6963 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6964 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6965 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6966 6967 /* check */ 6968 if (pcbddc->dbg_flag) { 6969 PetscReal error; 6970 Vec x,x_change; 6971 6972 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6973 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6974 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6975 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6976 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6977 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6978 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6979 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6980 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6981 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6982 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6983 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6984 if (error > PETSC_SMALL) { 6985 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6986 } 6987 ierr = VecDestroy(&x);CHKERRQ(ierr); 6988 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6989 } 6990 /* adapt sub_schurs computed (if any) */ 6991 if (pcbddc->use_deluxe_scaling) { 6992 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6993 6994 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6995 if (sub_schurs && sub_schurs->S_Ej_all) { 6996 Mat S_new,tmat; 6997 IS is_all_N,is_V_Sall = NULL; 6998 6999 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 7000 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 7001 if (pcbddc->deluxe_zerorows) { 7002 ISLocalToGlobalMapping NtoSall; 7003 IS is_V; 7004 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 7005 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 7006 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 7007 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 7008 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 7009 } 7010 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 7011 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7012 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 7013 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7014 if (pcbddc->deluxe_zerorows) { 7015 const PetscScalar *array; 7016 const PetscInt *idxs_V,*idxs_all; 7017 PetscInt i,n_V; 7018 7019 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7020 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 7021 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7022 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7023 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 7024 for (i=0;i<n_V;i++) { 7025 PetscScalar val; 7026 PetscInt idx; 7027 7028 idx = idxs_V[i]; 7029 val = array[idxs_all[idxs_V[i]]]; 7030 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 7031 } 7032 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7033 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7034 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 7035 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7036 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7037 } 7038 sub_schurs->S_Ej_all = S_new; 7039 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7040 if (sub_schurs->sum_S_Ej_all) { 7041 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7042 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 7043 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7044 if (pcbddc->deluxe_zerorows) { 7045 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7046 } 7047 sub_schurs->sum_S_Ej_all = S_new; 7048 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7049 } 7050 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 7051 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 7052 } 7053 /* destroy any change of basis context in sub_schurs */ 7054 if (sub_schurs && sub_schurs->change) { 7055 PetscInt i; 7056 7057 for (i=0;i<sub_schurs->n_subs;i++) { 7058 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 7059 } 7060 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 7061 } 7062 } 7063 if (pcbddc->switch_static) { /* need to save the local change */ 7064 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7065 } else { 7066 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 7067 } 7068 /* determine if any process has changed the pressures locally */ 7069 pcbddc->change_interior = pcbddc->benign_have_null; 7070 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7071 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 7072 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7073 pcbddc->use_qr_single = qr_needed; 7074 } 7075 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7076 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7077 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 7078 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7079 } else { 7080 Mat benign_global = NULL; 7081 if (pcbddc->benign_have_null) { 7082 Mat M; 7083 7084 pcbddc->change_interior = PETSC_TRUE; 7085 ierr = VecCopy(matis->counter,pcis->vec1_N);CHKERRQ(ierr); 7086 ierr = VecReciprocal(pcis->vec1_N);CHKERRQ(ierr); 7087 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);CHKERRQ(ierr); 7088 if (pcbddc->benign_change) { 7089 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 7090 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 7091 } else { 7092 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);CHKERRQ(ierr); 7093 ierr = MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);CHKERRQ(ierr); 7094 } 7095 ierr = MatISSetLocalMat(benign_global,M);CHKERRQ(ierr); 7096 ierr = MatDestroy(&M);CHKERRQ(ierr); 7097 ierr = MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7098 ierr = MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7099 } 7100 if (pcbddc->user_ChangeOfBasisMatrix) { 7101 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 7102 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 7103 } else if (pcbddc->benign_have_null) { 7104 pcbddc->ChangeOfBasisMatrix = benign_global; 7105 } 7106 } 7107 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7108 IS is_global; 7109 const PetscInt *gidxs; 7110 7111 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7112 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 7113 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7114 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 7115 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 7116 } 7117 } 7118 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7119 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 7120 } 7121 7122 if (!pcbddc->fake_change) { 7123 /* add pressure dofs to set of primal nodes for numbering purposes */ 7124 for (i=0;i<pcbddc->benign_n;i++) { 7125 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7126 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7127 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7128 pcbddc->local_primal_size_cc++; 7129 pcbddc->local_primal_size++; 7130 } 7131 7132 /* check if a new primal space has been introduced (also take into account benign trick) */ 7133 pcbddc->new_primal_space_local = PETSC_TRUE; 7134 if (olocal_primal_size == pcbddc->local_primal_size) { 7135 ierr = PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7136 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7137 if (!pcbddc->new_primal_space_local) { 7138 ierr = PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7139 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7140 } 7141 } 7142 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7143 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7144 } 7145 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 7146 7147 /* flush dbg viewer */ 7148 if (pcbddc->dbg_flag) { 7149 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7150 } 7151 7152 /* free workspace */ 7153 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 7154 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 7155 if (!pcbddc->adaptive_selection) { 7156 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 7157 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 7158 } else { 7159 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 7160 pcbddc->adaptive_constraints_idxs_ptr, 7161 pcbddc->adaptive_constraints_data_ptr, 7162 pcbddc->adaptive_constraints_idxs, 7163 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 7164 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 7165 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 7166 } 7167 PetscFunctionReturn(0); 7168 } 7169 7170 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7171 { 7172 ISLocalToGlobalMapping map; 7173 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7174 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7175 PetscInt i,N; 7176 PetscBool rcsr = PETSC_FALSE; 7177 PetscErrorCode ierr; 7178 7179 PetscFunctionBegin; 7180 if (pcbddc->recompute_topography) { 7181 pcbddc->graphanalyzed = PETSC_FALSE; 7182 /* Reset previously computed graph */ 7183 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 7184 /* Init local Graph struct */ 7185 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 7186 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 7187 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 7188 7189 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7190 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7191 } 7192 /* Check validity of the csr graph passed in by the user */ 7193 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7194 7195 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7196 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7197 PetscInt *xadj,*adjncy; 7198 PetscInt nvtxs; 7199 PetscBool flg_row=PETSC_FALSE; 7200 7201 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7202 if (flg_row) { 7203 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 7204 pcbddc->computed_rowadj = PETSC_TRUE; 7205 } 7206 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7207 rcsr = PETSC_TRUE; 7208 } 7209 if (pcbddc->dbg_flag) { 7210 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7211 } 7212 7213 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7214 PetscReal *lcoords; 7215 PetscInt n; 7216 MPI_Datatype dimrealtype; 7217 7218 /* TODO: support for blocked */ 7219 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7220 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7221 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 7222 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRMPI(ierr); 7223 ierr = MPI_Type_commit(&dimrealtype);CHKERRMPI(ierr); 7224 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 7225 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 7226 ierr = MPI_Type_free(&dimrealtype);CHKERRMPI(ierr); 7227 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 7228 7229 pcbddc->mat_graph->coords = lcoords; 7230 pcbddc->mat_graph->cloc = PETSC_TRUE; 7231 pcbddc->mat_graph->cnloc = n; 7232 } 7233 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7234 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 7235 7236 /* Setup of Graph */ 7237 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7238 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7239 7240 /* attach info on disconnected subdomains if present */ 7241 if (pcbddc->n_local_subs) { 7242 PetscInt *local_subs,n,totn; 7243 7244 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7245 ierr = PetscMalloc1(n,&local_subs);CHKERRQ(ierr); 7246 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7247 for (i=0;i<pcbddc->n_local_subs;i++) { 7248 const PetscInt *idxs; 7249 PetscInt nl,j; 7250 7251 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 7252 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7253 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7254 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7255 } 7256 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7257 pcbddc->mat_graph->n_local_subs = totn + 1; 7258 pcbddc->mat_graph->local_subs = local_subs; 7259 } 7260 } 7261 7262 if (!pcbddc->graphanalyzed) { 7263 /* Graph's connected components analysis */ 7264 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 7265 pcbddc->graphanalyzed = PETSC_TRUE; 7266 pcbddc->corner_selected = pcbddc->corner_selection; 7267 } 7268 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7269 PetscFunctionReturn(0); 7270 } 7271 7272 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7273 { 7274 PetscInt i,j,n; 7275 PetscScalar *alphas; 7276 PetscReal norm,*onorms; 7277 PetscErrorCode ierr; 7278 7279 PetscFunctionBegin; 7280 n = *nio; 7281 if (!n) PetscFunctionReturn(0); 7282 ierr = PetscMalloc2(n,&alphas,n,&onorms);CHKERRQ(ierr); 7283 ierr = VecNormalize(vecs[0],&norm);CHKERRQ(ierr); 7284 if (norm < PETSC_SMALL) { 7285 onorms[0] = 0.0; 7286 ierr = VecSet(vecs[0],0.0);CHKERRQ(ierr); 7287 } else { 7288 onorms[0] = norm; 7289 } 7290 7291 for (i=1;i<n;i++) { 7292 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 7293 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7294 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 7295 ierr = VecNormalize(vecs[i],&norm);CHKERRQ(ierr); 7296 if (norm < PETSC_SMALL) { 7297 onorms[i] = 0.0; 7298 ierr = VecSet(vecs[i],0.0);CHKERRQ(ierr); 7299 } else { 7300 onorms[i] = norm; 7301 } 7302 } 7303 /* push nonzero vectors at the beginning */ 7304 for (i=0;i<n;i++) { 7305 if (onorms[i] == 0.0) { 7306 for (j=i+1;j<n;j++) { 7307 if (onorms[j] != 0.0) { 7308 ierr = VecCopy(vecs[j],vecs[i]);CHKERRQ(ierr); 7309 onorms[j] = 0.0; 7310 } 7311 } 7312 } 7313 } 7314 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7315 ierr = PetscFree2(alphas,onorms);CHKERRQ(ierr); 7316 PetscFunctionReturn(0); 7317 } 7318 7319 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7320 { 7321 Mat A; 7322 PetscInt n_neighs,*neighs,*n_shared,**shared; 7323 PetscMPIInt size,rank,color; 7324 PetscInt *xadj,*adjncy; 7325 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7326 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7327 PetscInt void_procs,*procs_candidates = NULL; 7328 PetscInt xadj_count,*count; 7329 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7330 PetscSubcomm psubcomm; 7331 MPI_Comm subcomm; 7332 PetscErrorCode ierr; 7333 7334 PetscFunctionBegin; 7335 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7336 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7337 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7338 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7339 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7340 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7341 7342 if (have_void) *have_void = PETSC_FALSE; 7343 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRMPI(ierr); 7344 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 7345 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7346 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7347 im_active = !!n; 7348 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7349 void_procs = size - active_procs; 7350 /* get ranks of of non-active processes in mat communicator */ 7351 if (void_procs) { 7352 PetscInt ncand; 7353 7354 if (have_void) *have_void = PETSC_TRUE; 7355 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7356 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 7357 for (i=0,ncand=0;i<size;i++) { 7358 if (!procs_candidates[i]) { 7359 procs_candidates[ncand++] = i; 7360 } 7361 } 7362 /* force n_subdomains to be not greater that the number of non-active processes */ 7363 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7364 } 7365 7366 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7367 number of subdomains requested 1 -> send to master or first candidate in voids */ 7368 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7369 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7370 PetscInt issize,isidx,dest; 7371 if (*n_subdomains == 1) dest = 0; 7372 else dest = rank; 7373 if (im_active) { 7374 issize = 1; 7375 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7376 isidx = procs_candidates[dest]; 7377 } else { 7378 isidx = dest; 7379 } 7380 } else { 7381 issize = 0; 7382 isidx = -1; 7383 } 7384 if (*n_subdomains != 1) *n_subdomains = active_procs; 7385 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7386 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7387 PetscFunctionReturn(0); 7388 } 7389 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7390 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7391 threshold = PetscMax(threshold,2); 7392 7393 /* Get info on mapping */ 7394 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7395 7396 /* build local CSR graph of subdomains' connectivity */ 7397 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7398 xadj[0] = 0; 7399 xadj[1] = PetscMax(n_neighs-1,0); 7400 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7401 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7402 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7403 for (i=1;i<n_neighs;i++) 7404 for (j=0;j<n_shared[i];j++) 7405 count[shared[i][j]] += 1; 7406 7407 xadj_count = 0; 7408 for (i=1;i<n_neighs;i++) { 7409 for (j=0;j<n_shared[i];j++) { 7410 if (count[shared[i][j]] < threshold) { 7411 adjncy[xadj_count] = neighs[i]; 7412 adjncy_wgt[xadj_count] = n_shared[i]; 7413 xadj_count++; 7414 break; 7415 } 7416 } 7417 } 7418 xadj[1] = xadj_count; 7419 ierr = PetscFree(count);CHKERRQ(ierr); 7420 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7421 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7422 7423 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7424 7425 /* Restrict work on active processes only */ 7426 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7427 if (void_procs) { 7428 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7429 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7430 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7431 subcomm = PetscSubcommChild(psubcomm); 7432 } else { 7433 psubcomm = NULL; 7434 subcomm = PetscObjectComm((PetscObject)mat); 7435 } 7436 7437 v_wgt = NULL; 7438 if (!color) { 7439 ierr = PetscFree(xadj);CHKERRQ(ierr); 7440 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7441 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7442 } else { 7443 Mat subdomain_adj; 7444 IS new_ranks,new_ranks_contig; 7445 MatPartitioning partitioner; 7446 PetscInt rstart=0,rend=0; 7447 PetscInt *is_indices,*oldranks; 7448 PetscMPIInt size; 7449 PetscBool aggregate; 7450 7451 ierr = MPI_Comm_size(subcomm,&size);CHKERRMPI(ierr); 7452 if (void_procs) { 7453 PetscInt prank = rank; 7454 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7455 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRMPI(ierr); 7456 for (i=0;i<xadj[1];i++) { 7457 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7458 } 7459 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7460 } else { 7461 oldranks = NULL; 7462 } 7463 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7464 if (aggregate) { /* TODO: all this part could be made more efficient */ 7465 PetscInt lrows,row,ncols,*cols; 7466 PetscMPIInt nrank; 7467 PetscScalar *vals; 7468 7469 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRMPI(ierr); 7470 lrows = 0; 7471 if (nrank<redprocs) { 7472 lrows = size/redprocs; 7473 if (nrank<size%redprocs) lrows++; 7474 } 7475 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7476 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7477 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7478 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7479 row = nrank; 7480 ncols = xadj[1]-xadj[0]; 7481 cols = adjncy; 7482 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7483 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7484 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7485 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7486 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7487 ierr = PetscFree(xadj);CHKERRQ(ierr); 7488 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7489 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7490 ierr = PetscFree(vals);CHKERRQ(ierr); 7491 if (use_vwgt) { 7492 Vec v; 7493 const PetscScalar *array; 7494 PetscInt nl; 7495 7496 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7497 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7498 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7499 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7500 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7501 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7502 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7503 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7504 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7505 ierr = VecDestroy(&v);CHKERRQ(ierr); 7506 } 7507 } else { 7508 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7509 if (use_vwgt) { 7510 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7511 v_wgt[0] = n; 7512 } 7513 } 7514 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7515 7516 /* Partition */ 7517 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7518 #if defined(PETSC_HAVE_PTSCOTCH) 7519 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);CHKERRQ(ierr); 7520 #elif defined(PETSC_HAVE_PARMETIS) 7521 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);CHKERRQ(ierr); 7522 #else 7523 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);CHKERRQ(ierr); 7524 #endif 7525 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7526 if (v_wgt) { 7527 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7528 } 7529 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7530 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7531 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7532 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7533 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7534 7535 /* renumber new_ranks to avoid "holes" in new set of processors */ 7536 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7537 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7538 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7539 if (!aggregate) { 7540 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7541 if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7542 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7543 } else if (oldranks) { 7544 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7545 } else { 7546 ranks_send_to_idx[0] = is_indices[0]; 7547 } 7548 } else { 7549 PetscInt idx = 0; 7550 PetscMPIInt tag; 7551 MPI_Request *reqs; 7552 7553 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7554 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7555 for (i=rstart;i<rend;i++) { 7556 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRMPI(ierr); 7557 } 7558 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRMPI(ierr); 7559 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 7560 ierr = PetscFree(reqs);CHKERRQ(ierr); 7561 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7562 if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7563 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7564 } else if (oldranks) { 7565 ranks_send_to_idx[0] = oldranks[idx]; 7566 } else { 7567 ranks_send_to_idx[0] = idx; 7568 } 7569 } 7570 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7571 /* clean up */ 7572 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7573 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7574 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7575 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7576 } 7577 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7578 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7579 7580 /* assemble parallel IS for sends */ 7581 i = 1; 7582 if (!color) i=0; 7583 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7584 PetscFunctionReturn(0); 7585 } 7586 7587 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7588 7589 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7590 { 7591 Mat local_mat; 7592 IS is_sends_internal; 7593 PetscInt rows,cols,new_local_rows; 7594 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7595 PetscBool ismatis,isdense,newisdense,destroy_mat; 7596 ISLocalToGlobalMapping l2gmap; 7597 PetscInt* l2gmap_indices; 7598 const PetscInt* is_indices; 7599 MatType new_local_type; 7600 /* buffers */ 7601 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7602 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7603 PetscInt *recv_buffer_idxs_local; 7604 PetscScalar *ptr_vals,*recv_buffer_vals; 7605 const PetscScalar *send_buffer_vals; 7606 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7607 /* MPI */ 7608 MPI_Comm comm,comm_n; 7609 PetscSubcomm subcomm; 7610 PetscMPIInt n_sends,n_recvs,size; 7611 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7612 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7613 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7614 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7615 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7616 PetscErrorCode ierr; 7617 7618 PetscFunctionBegin; 7619 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7620 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7621 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7622 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7623 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7624 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7625 PetscValidLogicalCollectiveBool(mat,reuse,6); 7626 PetscValidLogicalCollectiveInt(mat,nis,8); 7627 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7628 if (nvecs) { 7629 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7630 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7631 } 7632 /* further checks */ 7633 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7634 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7635 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7636 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7637 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7638 if (reuse && *mat_n) { 7639 PetscInt mrows,mcols,mnrows,mncols; 7640 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7641 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7642 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7643 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7644 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7645 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7646 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7647 } 7648 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7649 PetscValidLogicalCollectiveInt(mat,bs,0); 7650 7651 /* prepare IS for sending if not provided */ 7652 if (!is_sends) { 7653 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7654 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7655 } else { 7656 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7657 is_sends_internal = is_sends; 7658 } 7659 7660 /* get comm */ 7661 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7662 7663 /* compute number of sends */ 7664 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7665 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7666 7667 /* compute number of receives */ 7668 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 7669 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 7670 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 7671 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7672 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7673 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7674 ierr = PetscFree(iflags);CHKERRQ(ierr); 7675 7676 /* restrict comm if requested */ 7677 subcomm = NULL; 7678 destroy_mat = PETSC_FALSE; 7679 if (restrict_comm) { 7680 PetscMPIInt color,subcommsize; 7681 7682 color = 0; 7683 if (restrict_full) { 7684 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7685 } else { 7686 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7687 } 7688 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7689 subcommsize = size - subcommsize; 7690 /* check if reuse has been requested */ 7691 if (reuse) { 7692 if (*mat_n) { 7693 PetscMPIInt subcommsize2; 7694 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRMPI(ierr); 7695 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7696 comm_n = PetscObjectComm((PetscObject)*mat_n); 7697 } else { 7698 comm_n = PETSC_COMM_SELF; 7699 } 7700 } else { /* MAT_INITIAL_MATRIX */ 7701 PetscMPIInt rank; 7702 7703 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 7704 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7705 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7706 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7707 comm_n = PetscSubcommChild(subcomm); 7708 } 7709 /* flag to destroy *mat_n if not significative */ 7710 if (color) destroy_mat = PETSC_TRUE; 7711 } else { 7712 comm_n = comm; 7713 } 7714 7715 /* prepare send/receive buffers */ 7716 ierr = PetscMalloc1(size,&ilengths_idxs);CHKERRQ(ierr); 7717 ierr = PetscArrayzero(ilengths_idxs,size);CHKERRQ(ierr); 7718 ierr = PetscMalloc1(size,&ilengths_vals);CHKERRQ(ierr); 7719 ierr = PetscArrayzero(ilengths_vals,size);CHKERRQ(ierr); 7720 if (nis) { 7721 ierr = PetscCalloc1(size,&ilengths_idxs_is);CHKERRQ(ierr); 7722 } 7723 7724 /* Get data from local matrices */ 7725 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7726 /* TODO: See below some guidelines on how to prepare the local buffers */ 7727 /* 7728 send_buffer_vals should contain the raw values of the local matrix 7729 send_buffer_idxs should contain: 7730 - MatType_PRIVATE type 7731 - PetscInt size_of_l2gmap 7732 - PetscInt global_row_indices[size_of_l2gmap] 7733 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7734 */ 7735 else { 7736 ierr = MatDenseGetArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7737 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7738 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7739 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7740 send_buffer_idxs[1] = i; 7741 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7742 ierr = PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);CHKERRQ(ierr); 7743 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7744 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7745 for (i=0;i<n_sends;i++) { 7746 ilengths_vals[is_indices[i]] = len*len; 7747 ilengths_idxs[is_indices[i]] = len+2; 7748 } 7749 } 7750 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7751 /* additional is (if any) */ 7752 if (nis) { 7753 PetscMPIInt psum; 7754 PetscInt j; 7755 for (j=0,psum=0;j<nis;j++) { 7756 PetscInt plen; 7757 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7758 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7759 psum += len+1; /* indices + lenght */ 7760 } 7761 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7762 for (j=0,psum=0;j<nis;j++) { 7763 PetscInt plen; 7764 const PetscInt *is_array_idxs; 7765 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7766 send_buffer_idxs_is[psum] = plen; 7767 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7768 ierr = PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);CHKERRQ(ierr); 7769 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7770 psum += plen+1; /* indices + lenght */ 7771 } 7772 for (i=0;i<n_sends;i++) { 7773 ilengths_idxs_is[is_indices[i]] = psum; 7774 } 7775 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7776 } 7777 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7778 7779 buf_size_idxs = 0; 7780 buf_size_vals = 0; 7781 buf_size_idxs_is = 0; 7782 buf_size_vecs = 0; 7783 for (i=0;i<n_recvs;i++) { 7784 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7785 buf_size_vals += (PetscInt)olengths_vals[i]; 7786 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7787 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7788 } 7789 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7790 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7791 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7792 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7793 7794 /* get new tags for clean communications */ 7795 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7796 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7797 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7798 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7799 7800 /* allocate for requests */ 7801 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7802 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7803 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7804 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7805 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7806 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7807 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7808 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7809 7810 /* communications */ 7811 ptr_idxs = recv_buffer_idxs; 7812 ptr_vals = recv_buffer_vals; 7813 ptr_idxs_is = recv_buffer_idxs_is; 7814 ptr_vecs = recv_buffer_vecs; 7815 for (i=0;i<n_recvs;i++) { 7816 source_dest = onodes[i]; 7817 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRMPI(ierr); 7818 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRMPI(ierr); 7819 ptr_idxs += olengths_idxs[i]; 7820 ptr_vals += olengths_vals[i]; 7821 if (nis) { 7822 source_dest = onodes_is[i]; 7823 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRMPI(ierr); 7824 ptr_idxs_is += olengths_idxs_is[i]; 7825 } 7826 if (nvecs) { 7827 source_dest = onodes[i]; 7828 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRMPI(ierr); 7829 ptr_vecs += olengths_idxs[i]-2; 7830 } 7831 } 7832 for (i=0;i<n_sends;i++) { 7833 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7834 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRMPI(ierr); 7835 ierr = MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRMPI(ierr); 7836 if (nis) { 7837 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRMPI(ierr); 7838 } 7839 if (nvecs) { 7840 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7841 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRMPI(ierr); 7842 } 7843 } 7844 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7845 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7846 7847 /* assemble new l2g map */ 7848 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 7849 ptr_idxs = recv_buffer_idxs; 7850 new_local_rows = 0; 7851 for (i=0;i<n_recvs;i++) { 7852 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7853 ptr_idxs += olengths_idxs[i]; 7854 } 7855 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7856 ptr_idxs = recv_buffer_idxs; 7857 new_local_rows = 0; 7858 for (i=0;i<n_recvs;i++) { 7859 ierr = PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));CHKERRQ(ierr); 7860 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7861 ptr_idxs += olengths_idxs[i]; 7862 } 7863 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7864 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7865 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7866 7867 /* infer new local matrix type from received local matrices type */ 7868 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7869 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7870 if (n_recvs) { 7871 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7872 ptr_idxs = recv_buffer_idxs; 7873 for (i=0;i<n_recvs;i++) { 7874 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7875 new_local_type_private = MATAIJ_PRIVATE; 7876 break; 7877 } 7878 ptr_idxs += olengths_idxs[i]; 7879 } 7880 switch (new_local_type_private) { 7881 case MATDENSE_PRIVATE: 7882 new_local_type = MATSEQAIJ; 7883 bs = 1; 7884 break; 7885 case MATAIJ_PRIVATE: 7886 new_local_type = MATSEQAIJ; 7887 bs = 1; 7888 break; 7889 case MATBAIJ_PRIVATE: 7890 new_local_type = MATSEQBAIJ; 7891 break; 7892 case MATSBAIJ_PRIVATE: 7893 new_local_type = MATSEQSBAIJ; 7894 break; 7895 default: 7896 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7897 } 7898 } else { /* by default, new_local_type is seqaij */ 7899 new_local_type = MATSEQAIJ; 7900 bs = 1; 7901 } 7902 7903 /* create MATIS object if needed */ 7904 if (!reuse) { 7905 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7906 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7907 } else { 7908 /* it also destroys the local matrices */ 7909 if (*mat_n) { 7910 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7911 } else { /* this is a fake object */ 7912 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7913 } 7914 } 7915 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7916 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7917 7918 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 7919 7920 /* Global to local map of received indices */ 7921 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7922 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7923 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7924 7925 /* restore attributes -> type of incoming data and its size */ 7926 buf_size_idxs = 0; 7927 for (i=0;i<n_recvs;i++) { 7928 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7929 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7930 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7931 } 7932 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7933 7934 /* set preallocation */ 7935 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7936 if (!newisdense) { 7937 PetscInt *new_local_nnz=NULL; 7938 7939 ptr_idxs = recv_buffer_idxs_local; 7940 if (n_recvs) { 7941 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7942 } 7943 for (i=0;i<n_recvs;i++) { 7944 PetscInt j; 7945 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7946 for (j=0;j<*(ptr_idxs+1);j++) { 7947 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7948 } 7949 } else { 7950 /* TODO */ 7951 } 7952 ptr_idxs += olengths_idxs[i]; 7953 } 7954 if (new_local_nnz) { 7955 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7956 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7957 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7958 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7959 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7960 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7961 } else { 7962 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7963 } 7964 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7965 } else { 7966 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7967 } 7968 7969 /* set values */ 7970 ptr_vals = recv_buffer_vals; 7971 ptr_idxs = recv_buffer_idxs_local; 7972 for (i=0;i<n_recvs;i++) { 7973 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7974 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7975 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7976 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7977 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7978 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7979 } else { 7980 /* TODO */ 7981 } 7982 ptr_idxs += olengths_idxs[i]; 7983 ptr_vals += olengths_vals[i]; 7984 } 7985 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7986 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7987 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7988 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7989 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7990 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7991 7992 #if 0 7993 if (!restrict_comm) { /* check */ 7994 Vec lvec,rvec; 7995 PetscReal infty_error; 7996 7997 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7998 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7999 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 8000 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 8001 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 8002 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8003 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 8004 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 8005 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 8006 } 8007 #endif 8008 8009 /* assemble new additional is (if any) */ 8010 if (nis) { 8011 PetscInt **temp_idxs,*count_is,j,psum; 8012 8013 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8014 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 8015 ptr_idxs = recv_buffer_idxs_is; 8016 psum = 0; 8017 for (i=0;i<n_recvs;i++) { 8018 for (j=0;j<nis;j++) { 8019 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8020 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8021 psum += plen; 8022 ptr_idxs += plen+1; /* shift pointer to received data */ 8023 } 8024 } 8025 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 8026 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 8027 for (i=1;i<nis;i++) { 8028 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 8029 } 8030 ierr = PetscArrayzero(count_is,nis);CHKERRQ(ierr); 8031 ptr_idxs = recv_buffer_idxs_is; 8032 for (i=0;i<n_recvs;i++) { 8033 for (j=0;j<nis;j++) { 8034 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8035 ierr = PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);CHKERRQ(ierr); 8036 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8037 ptr_idxs += plen+1; /* shift pointer to received data */ 8038 } 8039 } 8040 for (i=0;i<nis;i++) { 8041 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8042 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr); 8043 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8044 } 8045 ierr = PetscFree(count_is);CHKERRQ(ierr); 8046 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 8047 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 8048 } 8049 /* free workspace */ 8050 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 8051 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8052 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 8053 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8054 if (isdense) { 8055 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 8056 ierr = MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 8057 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 8058 } else { 8059 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 8060 } 8061 if (nis) { 8062 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8063 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 8064 } 8065 8066 if (nvecs) { 8067 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8068 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8069 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8070 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8071 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 8072 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 8073 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 8074 /* set values */ 8075 ptr_vals = recv_buffer_vecs; 8076 ptr_idxs = recv_buffer_idxs_local; 8077 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8078 for (i=0;i<n_recvs;i++) { 8079 PetscInt j; 8080 for (j=0;j<*(ptr_idxs+1);j++) { 8081 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8082 } 8083 ptr_idxs += olengths_idxs[i]; 8084 ptr_vals += olengths_idxs[i]-2; 8085 } 8086 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8087 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 8088 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 8089 } 8090 8091 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 8092 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 8093 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 8094 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 8095 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 8096 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 8097 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 8098 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 8099 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 8100 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 8101 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 8102 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 8103 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 8104 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 8105 ierr = PetscFree(onodes);CHKERRQ(ierr); 8106 if (nis) { 8107 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 8108 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 8109 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 8110 } 8111 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 8112 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 8113 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 8114 for (i=0;i<nis;i++) { 8115 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8116 } 8117 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8118 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8119 } 8120 *mat_n = NULL; 8121 } 8122 PetscFunctionReturn(0); 8123 } 8124 8125 /* temporary hack into ksp private data structure */ 8126 #include <petsc/private/kspimpl.h> 8127 8128 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8129 { 8130 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8131 PC_IS *pcis = (PC_IS*)pc->data; 8132 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8133 Mat coarsedivudotp = NULL; 8134 Mat coarseG,t_coarse_mat_is; 8135 MatNullSpace CoarseNullSpace = NULL; 8136 ISLocalToGlobalMapping coarse_islg; 8137 IS coarse_is,*isarray,corners; 8138 PetscInt i,im_active=-1,active_procs=-1; 8139 PetscInt nis,nisdofs,nisneu,nisvert; 8140 PetscInt coarse_eqs_per_proc; 8141 PC pc_temp; 8142 PCType coarse_pc_type; 8143 KSPType coarse_ksp_type; 8144 PetscBool multilevel_requested,multilevel_allowed; 8145 PetscBool coarse_reuse; 8146 PetscInt ncoarse,nedcfield; 8147 PetscBool compute_vecs = PETSC_FALSE; 8148 PetscScalar *array; 8149 MatReuse coarse_mat_reuse; 8150 PetscBool restr, full_restr, have_void; 8151 PetscMPIInt size; 8152 PetscErrorCode ierr; 8153 8154 PetscFunctionBegin; 8155 ierr = PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8156 /* Assign global numbering to coarse dofs */ 8157 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8158 PetscInt ocoarse_size; 8159 compute_vecs = PETSC_TRUE; 8160 8161 pcbddc->new_primal_space = PETSC_TRUE; 8162 ocoarse_size = pcbddc->coarse_size; 8163 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 8164 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 8165 /* see if we can avoid some work */ 8166 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8167 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8168 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8169 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 8170 coarse_reuse = PETSC_FALSE; 8171 } else { /* we can safely reuse already computed coarse matrix */ 8172 coarse_reuse = PETSC_TRUE; 8173 } 8174 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8175 coarse_reuse = PETSC_FALSE; 8176 } 8177 /* reset any subassembling information */ 8178 if (!coarse_reuse || pcbddc->recompute_topography) { 8179 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8180 } 8181 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8182 coarse_reuse = PETSC_TRUE; 8183 } 8184 if (coarse_reuse && pcbddc->coarse_ksp) { 8185 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 8186 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 8187 coarse_mat_reuse = MAT_REUSE_MATRIX; 8188 } else { 8189 coarse_mat = NULL; 8190 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8191 } 8192 8193 /* creates temporary l2gmap and IS for coarse indexes */ 8194 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 8195 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 8196 8197 /* creates temporary MATIS object for coarse matrix */ 8198 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 8199 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 8200 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 8201 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8202 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8203 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 8204 8205 /* count "active" (i.e. with positive local size) and "void" processes */ 8206 im_active = !!(pcis->n); 8207 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8208 8209 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8210 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8211 /* full_restr : just use the receivers from the subassembling pattern */ 8212 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 8213 coarse_mat_is = NULL; 8214 multilevel_allowed = PETSC_FALSE; 8215 multilevel_requested = PETSC_FALSE; 8216 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8217 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8218 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8219 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8220 if (multilevel_requested) { 8221 ncoarse = active_procs/pcbddc->coarsening_ratio; 8222 restr = PETSC_FALSE; 8223 full_restr = PETSC_FALSE; 8224 } else { 8225 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8226 restr = PETSC_TRUE; 8227 full_restr = PETSC_TRUE; 8228 } 8229 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8230 ncoarse = PetscMax(1,ncoarse); 8231 if (!pcbddc->coarse_subassembling) { 8232 if (pcbddc->coarsening_ratio > 1) { 8233 if (multilevel_requested) { 8234 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8235 } else { 8236 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8237 } 8238 } else { 8239 PetscMPIInt rank; 8240 8241 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 8242 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8243 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8244 } 8245 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8246 PetscInt psum; 8247 if (pcbddc->coarse_ksp) psum = 1; 8248 else psum = 0; 8249 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8250 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8251 } 8252 /* determine if we can go multilevel */ 8253 if (multilevel_requested) { 8254 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8255 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8256 } 8257 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8258 8259 /* dump subassembling pattern */ 8260 if (pcbddc->dbg_flag && multilevel_allowed) { 8261 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 8262 } 8263 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8264 nedcfield = -1; 8265 corners = NULL; 8266 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8267 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8268 const PetscInt *idxs; 8269 ISLocalToGlobalMapping tmap; 8270 8271 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8272 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 8273 /* allocate space for temporary storage */ 8274 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 8275 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 8276 /* allocate for IS array */ 8277 nisdofs = pcbddc->n_ISForDofsLocal; 8278 if (pcbddc->nedclocal) { 8279 if (pcbddc->nedfield > -1) { 8280 nedcfield = pcbddc->nedfield; 8281 } else { 8282 nedcfield = 0; 8283 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8284 nisdofs = 1; 8285 } 8286 } 8287 nisneu = !!pcbddc->NeumannBoundariesLocal; 8288 nisvert = 0; /* nisvert is not used */ 8289 nis = nisdofs + nisneu + nisvert; 8290 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 8291 /* dofs splitting */ 8292 for (i=0;i<nisdofs;i++) { 8293 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 8294 if (nedcfield != i) { 8295 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 8296 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8297 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8298 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8299 } else { 8300 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 8301 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8302 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8303 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8304 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8305 } 8306 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8307 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8308 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 8309 } 8310 /* neumann boundaries */ 8311 if (pcbddc->NeumannBoundariesLocal) { 8312 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 8313 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 8314 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8315 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8316 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8317 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8318 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8319 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8320 } 8321 /* coordinates */ 8322 if (pcbddc->corner_selected) { 8323 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8324 ierr = ISGetLocalSize(corners,&tsize);CHKERRQ(ierr); 8325 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8326 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8327 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout); 8328 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8329 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8330 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8331 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);CHKERRQ(ierr); 8332 } 8333 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8334 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8335 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8336 } else { 8337 nis = 0; 8338 nisdofs = 0; 8339 nisneu = 0; 8340 nisvert = 0; 8341 isarray = NULL; 8342 } 8343 /* destroy no longer needed map */ 8344 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8345 8346 /* subassemble */ 8347 if (multilevel_allowed) { 8348 Vec vp[1]; 8349 PetscInt nvecs = 0; 8350 PetscBool reuse,reuser; 8351 8352 if (coarse_mat) reuse = PETSC_TRUE; 8353 else reuse = PETSC_FALSE; 8354 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8355 vp[0] = NULL; 8356 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8357 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8358 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8359 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8360 nvecs = 1; 8361 8362 if (pcbddc->divudotp) { 8363 Mat B,loc_divudotp; 8364 Vec v,p; 8365 IS dummy; 8366 PetscInt np; 8367 8368 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8369 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8370 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8371 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8372 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8373 ierr = VecSet(p,1.);CHKERRQ(ierr); 8374 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8375 ierr = VecDestroy(&p);CHKERRQ(ierr); 8376 ierr = MatDestroy(&B);CHKERRQ(ierr); 8377 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8378 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8379 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8380 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8381 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8382 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8383 ierr = VecDestroy(&v);CHKERRQ(ierr); 8384 } 8385 } 8386 if (reuser) { 8387 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8388 } else { 8389 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8390 } 8391 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8392 PetscScalar *arraym; 8393 const PetscScalar *arrayv; 8394 PetscInt nl; 8395 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8396 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8397 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8398 ierr = VecGetArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8399 ierr = PetscArraycpy(arraym,arrayv,nl);CHKERRQ(ierr); 8400 ierr = VecRestoreArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8401 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8402 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8403 } else { 8404 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8405 } 8406 } else { 8407 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8408 } 8409 if (coarse_mat_is || coarse_mat) { 8410 if (!multilevel_allowed) { 8411 ierr = MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8412 } else { 8413 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8414 if (coarse_mat_is) { 8415 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8416 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8417 coarse_mat = coarse_mat_is; 8418 } 8419 } 8420 } 8421 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8422 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8423 8424 /* create local to global scatters for coarse problem */ 8425 if (compute_vecs) { 8426 PetscInt lrows; 8427 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8428 if (coarse_mat) { 8429 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8430 } else { 8431 lrows = 0; 8432 } 8433 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8434 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8435 ierr = VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);CHKERRQ(ierr); 8436 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8437 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8438 } 8439 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8440 8441 /* set defaults for coarse KSP and PC */ 8442 if (multilevel_allowed) { 8443 coarse_ksp_type = KSPRICHARDSON; 8444 coarse_pc_type = PCBDDC; 8445 } else { 8446 coarse_ksp_type = KSPPREONLY; 8447 coarse_pc_type = PCREDUNDANT; 8448 } 8449 8450 /* print some info if requested */ 8451 if (pcbddc->dbg_flag) { 8452 if (!multilevel_allowed) { 8453 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8454 if (multilevel_requested) { 8455 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8456 } else if (pcbddc->max_levels) { 8457 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);CHKERRQ(ierr); 8458 } 8459 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8460 } 8461 } 8462 8463 /* communicate coarse discrete gradient */ 8464 coarseG = NULL; 8465 if (pcbddc->nedcG && multilevel_allowed) { 8466 MPI_Comm ccomm; 8467 if (coarse_mat) { 8468 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8469 } else { 8470 ccomm = MPI_COMM_NULL; 8471 } 8472 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8473 } 8474 8475 /* create the coarse KSP object only once with defaults */ 8476 if (coarse_mat) { 8477 PetscBool isredundant,isbddc,force,valid; 8478 PetscViewer dbg_viewer = NULL; 8479 8480 if (pcbddc->dbg_flag) { 8481 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8482 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8483 } 8484 if (!pcbddc->coarse_ksp) { 8485 char prefix[256],str_level[16]; 8486 size_t len; 8487 8488 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8489 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8490 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8491 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8492 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8493 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8494 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8495 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8496 /* TODO is this logic correct? should check for coarse_mat type */ 8497 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8498 /* prefix */ 8499 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8500 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8501 if (!pcbddc->current_level) { 8502 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8503 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8504 } else { 8505 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8506 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8507 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8508 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8509 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8510 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8511 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8512 } 8513 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8514 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8515 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8516 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8517 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8518 /* allow user customization */ 8519 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8520 /* get some info after set from options */ 8521 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8522 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8523 force = PETSC_FALSE; 8524 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8525 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8526 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8527 if (multilevel_allowed && !force && !valid) { 8528 isbddc = PETSC_TRUE; 8529 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8530 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8531 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8532 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8533 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8534 ierr = PetscObjectOptionsBegin((PetscObject)pc_temp);CHKERRQ(ierr); 8535 ierr = (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);CHKERRQ(ierr); 8536 ierr = PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);CHKERRQ(ierr); 8537 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8538 pc_temp->setfromoptionscalled++; 8539 } 8540 } 8541 } 8542 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8543 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8544 if (nisdofs) { 8545 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8546 for (i=0;i<nisdofs;i++) { 8547 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8548 } 8549 } 8550 if (nisneu) { 8551 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8552 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8553 } 8554 if (nisvert) { 8555 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8556 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8557 } 8558 if (coarseG) { 8559 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8560 } 8561 8562 /* get some info after set from options */ 8563 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8564 8565 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8566 if (isbddc && !multilevel_allowed) { 8567 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8568 } 8569 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8570 force = PETSC_FALSE; 8571 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8572 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8573 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8574 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8575 } 8576 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8577 if (isredundant) { 8578 KSP inner_ksp; 8579 PC inner_pc; 8580 8581 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8582 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8583 } 8584 8585 /* parameters which miss an API */ 8586 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8587 if (isbddc) { 8588 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8589 8590 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8591 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8592 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8593 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8594 if (pcbddc_coarse->benign_saddle_point) { 8595 Mat coarsedivudotp_is; 8596 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8597 IS row,col; 8598 const PetscInt *gidxs; 8599 PetscInt n,st,M,N; 8600 8601 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8602 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRMPI(ierr); 8603 st = st-n; 8604 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8605 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8606 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8607 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8608 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8609 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8610 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8611 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8612 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8613 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8614 ierr = ISDestroy(&row);CHKERRQ(ierr); 8615 ierr = ISDestroy(&col);CHKERRQ(ierr); 8616 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8617 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8618 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8619 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8620 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8621 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8622 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8623 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8624 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8625 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8626 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8627 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8628 } 8629 } 8630 8631 /* propagate symmetry info of coarse matrix */ 8632 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8633 if (pc->pmat->symmetric_set) { 8634 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8635 } 8636 if (pc->pmat->hermitian_set) { 8637 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8638 } 8639 if (pc->pmat->spd_set) { 8640 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8641 } 8642 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8643 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8644 } 8645 /* set operators */ 8646 ierr = MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");CHKERRQ(ierr); 8647 ierr = MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 8648 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8649 if (pcbddc->dbg_flag) { 8650 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8651 } 8652 } 8653 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8654 ierr = PetscFree(isarray);CHKERRQ(ierr); 8655 #if 0 8656 { 8657 PetscViewer viewer; 8658 char filename[256]; 8659 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8660 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8661 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8662 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8663 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8664 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8665 } 8666 #endif 8667 8668 if (corners) { 8669 Vec gv; 8670 IS is; 8671 const PetscInt *idxs; 8672 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8673 PetscScalar *coords; 8674 8675 if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8676 ierr = VecGetSize(pcbddc->coarse_vec,&N);CHKERRQ(ierr); 8677 ierr = VecGetLocalSize(pcbddc->coarse_vec,&n);CHKERRQ(ierr); 8678 ierr = VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);CHKERRQ(ierr); 8679 ierr = VecSetBlockSize(gv,cdim);CHKERRQ(ierr); 8680 ierr = VecSetSizes(gv,n*cdim,N*cdim);CHKERRQ(ierr); 8681 ierr = VecSetType(gv,VECSTANDARD);CHKERRQ(ierr); 8682 ierr = VecSetFromOptions(gv);CHKERRQ(ierr); 8683 ierr = VecSet(gv,PETSC_MAX_REAL);CHKERRQ(ierr); /* we only propagate coordinates from vertices constraints */ 8684 8685 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8686 ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr); 8687 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 8688 ierr = PetscMalloc1(n*cdim,&coords);CHKERRQ(ierr); 8689 for (i=0;i<n;i++) { 8690 for (d=0;d<cdim;d++) { 8691 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8692 } 8693 } 8694 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 8695 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8696 8697 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 8698 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8699 ierr = VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);CHKERRQ(ierr); 8700 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8701 ierr = PetscFree(coords);CHKERRQ(ierr); 8702 ierr = VecAssemblyBegin(gv);CHKERRQ(ierr); 8703 ierr = VecAssemblyEnd(gv);CHKERRQ(ierr); 8704 ierr = VecGetArray(gv,&coords);CHKERRQ(ierr); 8705 if (pcbddc->coarse_ksp) { 8706 PC coarse_pc; 8707 PetscBool isbddc; 8708 8709 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 8710 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 8711 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8712 PetscReal *realcoords; 8713 8714 ierr = VecGetLocalSize(gv,&n);CHKERRQ(ierr); 8715 #if defined(PETSC_USE_COMPLEX) 8716 ierr = PetscMalloc1(n,&realcoords);CHKERRQ(ierr); 8717 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8718 #else 8719 realcoords = coords; 8720 #endif 8721 ierr = PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);CHKERRQ(ierr); 8722 #if defined(PETSC_USE_COMPLEX) 8723 ierr = PetscFree(realcoords);CHKERRQ(ierr); 8724 #endif 8725 } 8726 } 8727 ierr = VecRestoreArray(gv,&coords);CHKERRQ(ierr); 8728 ierr = VecDestroy(&gv);CHKERRQ(ierr); 8729 } 8730 ierr = ISDestroy(&corners);CHKERRQ(ierr); 8731 8732 if (pcbddc->coarse_ksp) { 8733 Vec crhs,csol; 8734 8735 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8736 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8737 if (!csol) { 8738 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8739 } 8740 if (!crhs) { 8741 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8742 } 8743 } 8744 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8745 8746 /* compute null space for coarse solver if the benign trick has been requested */ 8747 if (pcbddc->benign_null) { 8748 8749 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8750 for (i=0;i<pcbddc->benign_n;i++) { 8751 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8752 } 8753 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8754 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8755 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8756 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8757 if (coarse_mat) { 8758 Vec nullv; 8759 PetscScalar *array,*array2; 8760 PetscInt nl; 8761 8762 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8763 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8764 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8765 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8766 ierr = PetscArraycpy(array2,array,nl);CHKERRQ(ierr); 8767 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8768 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8769 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8770 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8771 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8772 } 8773 } 8774 ierr = PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8775 8776 ierr = PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8777 if (pcbddc->coarse_ksp) { 8778 PetscBool ispreonly; 8779 8780 if (CoarseNullSpace) { 8781 PetscBool isnull; 8782 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8783 if (isnull) { 8784 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8785 } 8786 /* TODO: add local nullspaces (if any) */ 8787 } 8788 /* setup coarse ksp */ 8789 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8790 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8791 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8792 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8793 KSP check_ksp; 8794 KSPType check_ksp_type; 8795 PC check_pc; 8796 Vec check_vec,coarse_vec; 8797 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8798 PetscInt its; 8799 PetscBool compute_eigs; 8800 PetscReal *eigs_r,*eigs_c; 8801 PetscInt neigs; 8802 const char *prefix; 8803 8804 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8805 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8806 ierr = PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);CHKERRQ(ierr); 8807 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 8808 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8809 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8810 /* prevent from setup unneeded object */ 8811 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8812 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8813 if (ispreonly) { 8814 check_ksp_type = KSPPREONLY; 8815 compute_eigs = PETSC_FALSE; 8816 } else { 8817 check_ksp_type = KSPGMRES; 8818 compute_eigs = PETSC_TRUE; 8819 } 8820 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8821 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8822 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8823 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8824 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8825 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8826 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8827 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8828 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8829 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8830 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8831 /* create random vec */ 8832 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8833 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8834 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8835 /* solve coarse problem */ 8836 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8837 ierr = KSPCheckSolve(check_ksp,pc,coarse_vec);CHKERRQ(ierr); 8838 /* set eigenvalue estimation if preonly has not been requested */ 8839 if (compute_eigs) { 8840 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8841 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8842 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8843 if (neigs) { 8844 lambda_max = eigs_r[neigs-1]; 8845 lambda_min = eigs_r[0]; 8846 if (pcbddc->use_coarse_estimates) { 8847 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8848 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8849 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8850 } 8851 } 8852 } 8853 } 8854 8855 /* check coarse problem residual error */ 8856 if (pcbddc->dbg_flag) { 8857 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8858 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8859 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8860 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8861 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8862 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8863 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8864 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8865 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8866 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8867 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8868 if (CoarseNullSpace) { 8869 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8870 } 8871 if (compute_eigs) { 8872 PetscReal lambda_max_s,lambda_min_s; 8873 KSPConvergedReason reason; 8874 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8875 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8876 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8877 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8878 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8879 for (i=0;i<neigs;i++) { 8880 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8881 } 8882 } 8883 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8884 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8885 } 8886 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8887 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8888 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8889 if (compute_eigs) { 8890 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8891 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8892 } 8893 } 8894 } 8895 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8896 /* print additional info */ 8897 if (pcbddc->dbg_flag) { 8898 /* waits until all processes reaches this point */ 8899 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8900 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);CHKERRQ(ierr); 8901 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8902 } 8903 8904 /* free memory */ 8905 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8906 ierr = PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8907 PetscFunctionReturn(0); 8908 } 8909 8910 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8911 { 8912 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8913 PC_IS* pcis = (PC_IS*)pc->data; 8914 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8915 IS subset,subset_mult,subset_n; 8916 PetscInt local_size,coarse_size=0; 8917 PetscInt *local_primal_indices=NULL; 8918 const PetscInt *t_local_primal_indices; 8919 PetscErrorCode ierr; 8920 8921 PetscFunctionBegin; 8922 /* Compute global number of coarse dofs */ 8923 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8924 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8925 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8926 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8927 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8928 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8929 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8930 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8931 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8932 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8933 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8934 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8935 ierr = PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);CHKERRQ(ierr); 8936 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8937 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8938 8939 /* check numbering */ 8940 if (pcbddc->dbg_flag) { 8941 PetscScalar coarsesum,*array,*array2; 8942 PetscInt i; 8943 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8944 8945 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8946 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8947 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8948 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8949 /* counter */ 8950 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8951 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8952 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8953 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8954 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8955 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8956 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8957 for (i=0;i<pcbddc->local_primal_size;i++) { 8958 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8959 } 8960 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8961 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8962 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8963 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8964 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8965 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8966 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8967 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8968 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8969 for (i=0;i<pcis->n;i++) { 8970 if (array[i] != 0.0 && array[i] != array2[i]) { 8971 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8972 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8973 set_error = PETSC_TRUE; 8974 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8975 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8976 } 8977 } 8978 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8979 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8980 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8981 for (i=0;i<pcis->n;i++) { 8982 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8983 } 8984 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8985 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8986 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8987 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8988 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8989 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8990 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8991 PetscInt *gidxs; 8992 8993 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8994 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8995 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8996 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8998 for (i=0;i<pcbddc->local_primal_size;i++) { 8999 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 9000 } 9001 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9002 ierr = PetscFree(gidxs);CHKERRQ(ierr); 9003 } 9004 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9005 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9006 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 9007 } 9008 9009 /* get back data */ 9010 *coarse_size_n = coarse_size; 9011 *local_primal_indices_n = local_primal_indices; 9012 PetscFunctionReturn(0); 9013 } 9014 9015 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 9016 { 9017 IS localis_t; 9018 PetscInt i,lsize,*idxs,n; 9019 PetscScalar *vals; 9020 PetscErrorCode ierr; 9021 9022 PetscFunctionBegin; 9023 /* get indices in local ordering exploiting local to global map */ 9024 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 9025 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 9026 for (i=0;i<lsize;i++) vals[i] = 1.0; 9027 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9028 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 9029 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 9030 if (idxs) { /* multilevel guard */ 9031 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 9032 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 9033 } 9034 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 9035 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9036 ierr = PetscFree(vals);CHKERRQ(ierr); 9037 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 9038 /* now compute set in local ordering */ 9039 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9040 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9041 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9042 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 9043 for (i=0,lsize=0;i<n;i++) { 9044 if (PetscRealPart(vals[i]) > 0.5) { 9045 lsize++; 9046 } 9047 } 9048 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 9049 for (i=0,lsize=0;i<n;i++) { 9050 if (PetscRealPart(vals[i]) > 0.5) { 9051 idxs[lsize++] = i; 9052 } 9053 } 9054 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9055 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 9056 *localis = localis_t; 9057 PetscFunctionReturn(0); 9058 } 9059 9060 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9061 { 9062 PC_IS *pcis=(PC_IS*)pc->data; 9063 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9064 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9065 Mat S_j; 9066 PetscInt *used_xadj,*used_adjncy; 9067 PetscBool free_used_adj; 9068 PetscErrorCode ierr; 9069 9070 PetscFunctionBegin; 9071 ierr = PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9072 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9073 free_used_adj = PETSC_FALSE; 9074 if (pcbddc->sub_schurs_layers == -1) { 9075 used_xadj = NULL; 9076 used_adjncy = NULL; 9077 } else { 9078 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9079 used_xadj = pcbddc->mat_graph->xadj; 9080 used_adjncy = pcbddc->mat_graph->adjncy; 9081 } else if (pcbddc->computed_rowadj) { 9082 used_xadj = pcbddc->mat_graph->xadj; 9083 used_adjncy = pcbddc->mat_graph->adjncy; 9084 } else { 9085 PetscBool flg_row=PETSC_FALSE; 9086 const PetscInt *xadj,*adjncy; 9087 PetscInt nvtxs; 9088 9089 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9090 if (flg_row) { 9091 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 9092 ierr = PetscArraycpy(used_xadj,xadj,nvtxs+1);CHKERRQ(ierr); 9093 ierr = PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);CHKERRQ(ierr); 9094 free_used_adj = PETSC_TRUE; 9095 } else { 9096 pcbddc->sub_schurs_layers = -1; 9097 used_xadj = NULL; 9098 used_adjncy = NULL; 9099 } 9100 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9101 } 9102 } 9103 9104 /* setup sub_schurs data */ 9105 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9106 if (!sub_schurs->schur_explicit) { 9107 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9108 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9109 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 9110 } else { 9111 Mat change = NULL; 9112 Vec scaling = NULL; 9113 IS change_primal = NULL, iP; 9114 PetscInt benign_n; 9115 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9116 PetscBool need_change = PETSC_FALSE; 9117 PetscBool discrete_harmonic = PETSC_FALSE; 9118 9119 if (!pcbddc->use_vertices && reuse_solvers) { 9120 PetscInt n_vertices; 9121 9122 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 9123 reuse_solvers = (PetscBool)!n_vertices; 9124 } 9125 if (!pcbddc->benign_change_explicit) { 9126 benign_n = pcbddc->benign_n; 9127 } else { 9128 benign_n = 0; 9129 } 9130 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9131 We need a global reduction to avoid possible deadlocks. 9132 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9133 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9134 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9135 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 9136 need_change = (PetscBool)(!need_change); 9137 } 9138 /* If the user defines additional constraints, we import them here. 9139 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 9140 if (need_change) { 9141 PC_IS *pcisf; 9142 PC_BDDC *pcbddcf; 9143 PC pcf; 9144 9145 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9146 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 9147 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 9148 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 9149 9150 /* hacks */ 9151 pcisf = (PC_IS*)pcf->data; 9152 pcisf->is_B_local = pcis->is_B_local; 9153 pcisf->vec1_N = pcis->vec1_N; 9154 pcisf->BtoNmap = pcis->BtoNmap; 9155 pcisf->n = pcis->n; 9156 pcisf->n_B = pcis->n_B; 9157 pcbddcf = (PC_BDDC*)pcf->data; 9158 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 9159 pcbddcf->mat_graph = pcbddc->mat_graph; 9160 pcbddcf->use_faces = PETSC_TRUE; 9161 pcbddcf->use_change_of_basis = PETSC_TRUE; 9162 pcbddcf->use_change_on_faces = PETSC_TRUE; 9163 pcbddcf->use_qr_single = PETSC_TRUE; 9164 pcbddcf->fake_change = PETSC_TRUE; 9165 9166 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 9167 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 9168 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 9169 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 9170 change = pcbddcf->ConstraintMatrix; 9171 pcbddcf->ConstraintMatrix = NULL; 9172 9173 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 9174 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 9175 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 9176 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 9177 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 9178 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 9179 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 9180 pcf->ops->destroy = NULL; 9181 pcf->ops->reset = NULL; 9182 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 9183 } 9184 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9185 9186 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 9187 if (iP) { 9188 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 9189 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 9190 ierr = PetscOptionsEnd();CHKERRQ(ierr); 9191 } 9192 if (discrete_harmonic) { 9193 Mat A; 9194 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 9195 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 9196 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 9197 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9198 ierr = MatDestroy(&A);CHKERRQ(ierr); 9199 } else { 9200 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9201 } 9202 ierr = MatDestroy(&change);CHKERRQ(ierr); 9203 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 9204 } 9205 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9206 9207 /* free adjacency */ 9208 if (free_used_adj) { 9209 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 9210 } 9211 ierr = PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9212 PetscFunctionReturn(0); 9213 } 9214 9215 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9216 { 9217 PC_IS *pcis=(PC_IS*)pc->data; 9218 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9219 PCBDDCGraph graph; 9220 PetscErrorCode ierr; 9221 9222 PetscFunctionBegin; 9223 /* attach interface graph for determining subsets */ 9224 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9225 IS verticesIS,verticescomm; 9226 PetscInt vsize,*idxs; 9227 9228 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9229 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 9230 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9231 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 9232 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9233 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9234 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 9235 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 9236 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 9237 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 9238 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 9239 } else { 9240 graph = pcbddc->mat_graph; 9241 } 9242 /* print some info */ 9243 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9244 IS vertices; 9245 PetscInt nv,nedges,nfaces; 9246 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 9247 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9248 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 9249 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9250 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 9251 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);CHKERRQ(ierr); 9252 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);CHKERRQ(ierr); 9253 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);CHKERRQ(ierr); 9254 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9255 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9256 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9257 } 9258 9259 /* sub_schurs init */ 9260 if (!pcbddc->sub_schurs) { 9261 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 9262 } 9263 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 9264 9265 /* free graph struct */ 9266 if (pcbddc->sub_schurs_rebuild) { 9267 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 9268 } 9269 PetscFunctionReturn(0); 9270 } 9271 9272 PetscErrorCode PCBDDCCheckOperator(PC pc) 9273 { 9274 PC_IS *pcis=(PC_IS*)pc->data; 9275 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9276 PetscErrorCode ierr; 9277 9278 PetscFunctionBegin; 9279 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9280 IS zerodiag = NULL; 9281 Mat S_j,B0_B=NULL; 9282 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9283 PetscScalar *p0_check,*array,*array2; 9284 PetscReal norm; 9285 PetscInt i; 9286 9287 /* B0 and B0_B */ 9288 if (zerodiag) { 9289 IS dummy; 9290 9291 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 9292 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 9293 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 9294 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 9295 } 9296 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9297 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 9298 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 9299 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9300 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9301 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9302 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9303 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 9304 /* S_j */ 9305 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9306 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9307 9308 /* mimic vector in \widetilde{W}_\Gamma */ 9309 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 9310 /* continuous in primal space */ 9311 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 9312 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9313 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9314 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9315 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 9316 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9317 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9318 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9319 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9320 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9321 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9322 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9323 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 9324 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 9325 9326 /* assemble rhs for coarse problem */ 9327 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9328 /* local with Schur */ 9329 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 9330 if (zerodiag) { 9331 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9332 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9333 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9334 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 9335 } 9336 /* sum on primal nodes the local contributions */ 9337 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9338 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9339 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9340 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9341 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9342 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9343 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9344 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 9345 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9346 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9347 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9348 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9349 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9350 /* scale primal nodes (BDDC sums contibutions) */ 9351 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 9352 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9353 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9354 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9355 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9356 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9357 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9358 /* global: \widetilde{B0}_B w_\Gamma */ 9359 if (zerodiag) { 9360 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 9361 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9362 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9363 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9364 } 9365 /* BDDC */ 9366 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 9367 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 9368 9369 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 9370 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 9371 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 9372 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);CHKERRQ(ierr); 9373 for (i=0;i<pcbddc->benign_n;i++) { 9374 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));CHKERRQ(ierr); 9375 } 9376 ierr = PetscFree(p0_check);CHKERRQ(ierr); 9377 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 9378 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 9379 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 9380 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9381 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 9382 } 9383 PetscFunctionReturn(0); 9384 } 9385 9386 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9387 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9388 { 9389 Mat At; 9390 IS rows; 9391 PetscInt rst,ren; 9392 PetscErrorCode ierr; 9393 PetscLayout rmap; 9394 9395 PetscFunctionBegin; 9396 rst = ren = 0; 9397 if (ccomm != MPI_COMM_NULL) { 9398 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 9399 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 9400 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 9401 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 9402 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 9403 } 9404 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 9405 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 9406 ierr = ISDestroy(&rows);CHKERRQ(ierr); 9407 9408 if (ccomm != MPI_COMM_NULL) { 9409 Mat_MPIAIJ *a,*b; 9410 IS from,to; 9411 Vec gvec; 9412 PetscInt lsize; 9413 9414 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9415 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9416 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9417 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9418 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9419 a = (Mat_MPIAIJ*)At->data; 9420 b = (Mat_MPIAIJ*)(*B)->data; 9421 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRMPI(ierr); 9422 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRMPI(ierr); 9423 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9424 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9425 b->A = a->A; 9426 b->B = a->B; 9427 9428 b->donotstash = a->donotstash; 9429 b->roworiented = a->roworiented; 9430 b->rowindices = NULL; 9431 b->rowvalues = NULL; 9432 b->getrowactive = PETSC_FALSE; 9433 9434 (*B)->rmap = rmap; 9435 (*B)->factortype = A->factortype; 9436 (*B)->assembled = PETSC_TRUE; 9437 (*B)->insertmode = NOT_SET_VALUES; 9438 (*B)->preallocated = PETSC_TRUE; 9439 9440 if (a->colmap) { 9441 #if defined(PETSC_USE_CTABLE) 9442 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9443 #else 9444 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9445 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9446 ierr = PetscArraycpy(b->colmap,a->colmap,At->cmap->N);CHKERRQ(ierr); 9447 #endif 9448 } else b->colmap = NULL; 9449 if (a->garray) { 9450 PetscInt len; 9451 len = a->B->cmap->n; 9452 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9453 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9454 if (len) { ierr = PetscArraycpy(b->garray,a->garray,len);CHKERRQ(ierr); } 9455 } else b->garray = NULL; 9456 9457 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9458 b->lvec = a->lvec; 9459 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9460 9461 /* cannot use VecScatterCopy */ 9462 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9463 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9464 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9465 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9466 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9467 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9468 ierr = ISDestroy(&from);CHKERRQ(ierr); 9469 ierr = ISDestroy(&to);CHKERRQ(ierr); 9470 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9471 } 9472 ierr = MatDestroy(&At);CHKERRQ(ierr); 9473 PetscFunctionReturn(0); 9474 } 9475