1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 PetscErrorCode ierr; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 28 if (!nr || !nc) PetscFunctionReturn(0); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 33 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr,nc); 39 if (!rwork) { 40 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 50 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 51 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 #else 55 ierr = PetscMalloc1(5*n,&rwork2);CHKERRQ(ierr); 56 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 57 ierr = PetscFree(rwork2);CHKERRQ(ierr); 58 #endif 59 ierr = PetscFPTrapPop();CHKERRQ(ierr); 60 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 61 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 62 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 63 if (!rwork) { 64 ierr = PetscFree(sing);CHKERRQ(ierr); 65 } 66 if (!work) { 67 ierr = PetscFree(uwork);CHKERRQ(ierr); 68 } 69 /* create B */ 70 if (!range) { 71 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 72 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscArraycpy(data,U+nr*i,(nr-i)*nr);CHKERRQ(ierr); 74 } else { 75 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 76 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 77 ierr = PetscArraycpy(data,U,i*nr);CHKERRQ(ierr); 78 } 79 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 80 ierr = PetscFree(U);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 const PetscScalar *vals; 122 PetscScalar v; 123 124 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 125 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 126 ierr = MatDenseGetArrayRead(GEd,&vals);CHKERRQ(ierr); 127 /* v = PetscAbsScalar(vals[0]) */; 128 v = 1.; 129 cvals[0] = vals[0]/v; 130 cvals[1] = vals[1]/v; 131 ierr = MatDenseRestoreArrayRead(GEd,&vals);CHKERRQ(ierr); 132 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 133 #if defined(PRINT_GDET) 134 { 135 PetscViewer viewer; 136 char filename[256]; 137 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 138 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 139 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 141 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 142 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 143 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 144 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 145 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 146 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 147 } 148 #endif 149 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 150 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 151 } 152 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode PCBDDCNedelecSupport(PC pc) 157 { 158 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 159 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 160 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 161 Vec tvec; 162 PetscSF sfv; 163 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 164 MPI_Comm comm; 165 IS lned,primals,allprimals,nedfieldlocal; 166 IS *eedges,*extrows,*extcols,*alleedges; 167 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 168 PetscScalar *vals,*work; 169 PetscReal *rwork; 170 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 171 PetscInt ne,nv,Lv,order,n,field; 172 PetscInt n_neigh,*neigh,*n_shared,**shared; 173 PetscInt i,j,extmem,cum,maxsize,nee; 174 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 175 PetscInt *sfvleaves,*sfvroots; 176 PetscInt *corners,*cedges; 177 PetscInt *ecount,**eneighs,*vcount,**vneighs; 178 PetscInt *emarks; 179 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 /* If the discrete gradient is defined for a subset of dofs and global is true, 184 it assumes G is given in global ordering for all the dofs. 185 Otherwise, the ordering is global for the Nedelec field */ 186 order = pcbddc->nedorder; 187 conforming = pcbddc->conforming; 188 field = pcbddc->nedfield; 189 global = pcbddc->nedglobal; 190 setprimal = PETSC_FALSE; 191 print = PETSC_FALSE; 192 singular = PETSC_FALSE; 193 194 /* Command line customization */ 195 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 196 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 199 /* print debug info TODO: to be removed */ 200 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 201 ierr = PetscOptionsEnd();CHKERRQ(ierr); 202 203 /* Return if there are no edges in the decomposition and the problem is not singular */ 204 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 205 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 206 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 207 if (!singular) { 208 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 209 lrc[0] = PETSC_FALSE; 210 for (i=0;i<n;i++) { 211 if (PetscRealPart(vals[i]) > 2.) { 212 lrc[0] = PETSC_TRUE; 213 break; 214 } 215 } 216 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 217 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr); 218 if (!lrc[1]) PetscFunctionReturn(0); 219 } 220 221 /* Get Nedelec field */ 222 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 223 if (pcbddc->n_ISForDofsLocal && field >= 0) { 224 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 225 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 226 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 227 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 228 ne = n; 229 nedfieldlocal = NULL; 230 global = PETSC_TRUE; 231 } else if (field == PETSC_DECIDE) { 232 PetscInt rst,ren,*idx; 233 234 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 235 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 236 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 237 for (i=rst;i<ren;i++) { 238 PetscInt nc; 239 240 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 242 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 } 244 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 245 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 246 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 247 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 248 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 249 } else { 250 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 251 } 252 253 /* Sanity checks */ 254 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 255 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 256 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 257 258 /* Just set primal dofs and return */ 259 if (setprimal) { 260 IS enedfieldlocal; 261 PetscInt *eidxs; 262 263 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 264 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 265 if (nedfieldlocal) { 266 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 267 for (i=0,cum=0;i<ne;i++) { 268 if (PetscRealPart(vals[idxs[i]]) > 2.) { 269 eidxs[cum++] = idxs[i]; 270 } 271 } 272 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 273 } else { 274 for (i=0,cum=0;i<ne;i++) { 275 if (PetscRealPart(vals[i]) > 2.) { 276 eidxs[cum++] = i; 277 } 278 } 279 } 280 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 281 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 282 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 283 ierr = PetscFree(eidxs);CHKERRQ(ierr); 284 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 285 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 /* Compute some l2g maps */ 290 if (nedfieldlocal) { 291 IS is; 292 293 /* need to map from the local Nedelec field to local numbering */ 294 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 295 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 296 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 297 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 298 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 299 if (global) { 300 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 301 el2g = al2g; 302 } else { 303 IS gis; 304 305 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 306 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 307 ierr = ISDestroy(&gis);CHKERRQ(ierr); 308 } 309 ierr = ISDestroy(&is);CHKERRQ(ierr); 310 } else { 311 /* restore default */ 312 pcbddc->nedfield = -1; 313 /* one ref for the destruction of al2g, one for el2g */ 314 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 el2g = al2g; 317 fl2g = NULL; 318 } 319 320 /* Start communication to drop connections for interior edges (for cc analysis only) */ 321 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 322 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 323 if (nedfieldlocal) { 324 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 326 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 } else { 328 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 329 } 330 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 331 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 333 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 334 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 335 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 336 if (global) { 337 PetscInt rst; 338 339 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 340 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 341 if (matis->sf_rootdata[i] < 2) { 342 matis->sf_rootdata[cum++] = i + rst; 343 } 344 } 345 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 346 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 347 } else { 348 PetscInt *tbz; 349 350 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 351 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 352 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 353 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 354 for (i=0,cum=0;i<ne;i++) 355 if (matis->sf_leafdata[idxs[i]] == 1) 356 tbz[cum++] = i; 357 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 358 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 359 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 360 ierr = PetscFree(tbz);CHKERRQ(ierr); 361 } 362 } else { /* we need the entire G to infer the nullspace */ 363 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 364 G = pcbddc->discretegradient; 365 } 366 367 /* Extract subdomain relevant rows of G */ 368 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 369 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 370 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 371 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 372 ierr = ISDestroy(&lned);CHKERRQ(ierr); 373 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 374 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 375 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 376 377 /* SF for nodal dofs communications */ 378 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 379 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 380 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 382 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 384 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 386 i = singular ? 2 : 1; 387 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 388 389 /* Destroy temporary G created in MATIS format and modified G */ 390 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 391 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 392 ierr = MatDestroy(&G);CHKERRQ(ierr); 393 394 if (print) { 395 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 396 ierr = MatView(lG,NULL);CHKERRQ(ierr); 397 } 398 399 /* Save lG for values insertion in change of basis */ 400 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 401 402 /* Analyze the edge-nodes connections (duplicate lG) */ 403 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 404 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 405 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 409 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 410 /* need to import the boundary specification to ensure the 411 proper detection of coarse edges' endpoints */ 412 if (pcbddc->DirichletBoundariesLocal) { 413 IS is; 414 415 if (fl2g) { 416 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 417 } else { 418 is = pcbddc->DirichletBoundariesLocal; 419 } 420 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 421 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 422 for (i=0;i<cum;i++) { 423 if (idxs[i] >= 0) { 424 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 425 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 426 } 427 } 428 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 429 if (fl2g) { 430 ierr = ISDestroy(&is);CHKERRQ(ierr); 431 } 432 } 433 if (pcbddc->NeumannBoundariesLocal) { 434 IS is; 435 436 if (fl2g) { 437 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 438 } else { 439 is = pcbddc->NeumannBoundariesLocal; 440 } 441 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 442 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 443 for (i=0;i<cum;i++) { 444 if (idxs[i] >= 0) { 445 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 446 } 447 } 448 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 449 if (fl2g) { 450 ierr = ISDestroy(&is);CHKERRQ(ierr); 451 } 452 } 453 454 /* Count neighs per dof */ 455 ierr = ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 456 ierr = ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 457 458 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 459 for proper detection of coarse edges' endpoints */ 460 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 461 for (i=0;i<ne;i++) { 462 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 463 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 467 if (!conforming) { 468 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 469 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 470 } 471 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 472 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 473 cum = 0; 474 for (i=0;i<ne;i++) { 475 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 476 if (!PetscBTLookup(btee,i)) { 477 marks[cum++] = i; 478 continue; 479 } 480 /* set badly connected edge dofs as primal */ 481 if (!conforming) { 482 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 483 marks[cum++] = i; 484 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 485 for (j=ii[i];j<ii[i+1];j++) { 486 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 487 } 488 } else { 489 /* every edge dofs should be connected trough a certain number of nodal dofs 490 to other edge dofs belonging to coarse edges 491 - at most 2 endpoints 492 - order-1 interior nodal dofs 493 - no undefined nodal dofs (nconn < order) 494 */ 495 PetscInt ends = 0,ints = 0, undef = 0; 496 for (j=ii[i];j<ii[i+1];j++) { 497 PetscInt v = jj[j],k; 498 PetscInt nconn = iit[v+1]-iit[v]; 499 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 500 if (nconn > order) ends++; 501 else if (nconn == order) ints++; 502 else undef++; 503 } 504 if (undef || ends > 2 || ints != order -1) { 505 marks[cum++] = i; 506 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 507 for (j=ii[i];j<ii[i+1];j++) { 508 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 509 } 510 } 511 } 512 } 513 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 514 if (!order && ii[i+1] != ii[i]) { 515 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 516 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 517 } 518 } 519 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 520 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 521 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 if (!conforming) { 523 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 524 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 525 } 526 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 527 528 /* identify splitpoints and corner candidates */ 529 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 530 if (print) { 531 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 532 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 533 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 534 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 535 } 536 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 537 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 538 for (i=0;i<nv;i++) { 539 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 540 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 541 if (!order) { /* variable order */ 542 PetscReal vorder = 0.; 543 544 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 545 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 546 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 547 ord = 1; 548 } 549 if (PetscUnlikelyDebug(test%ord)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord); 550 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 551 if (PetscBTLookup(btbd,jj[j])) { 552 bdir = PETSC_TRUE; 553 break; 554 } 555 if (vc != ecount[jj[j]]) { 556 sneighs = PETSC_FALSE; 557 } else { 558 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 559 for (k=0;k<vc;k++) { 560 if (vn[k] != en[k]) { 561 sneighs = PETSC_FALSE; 562 break; 563 } 564 } 565 } 566 } 567 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 568 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 569 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 570 } else if (test == ord) { 571 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 572 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 573 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 574 } else { 575 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 576 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 577 } 578 } 579 } 580 ierr = ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 581 ierr = ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 582 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 583 584 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 585 if (order != 1) { 586 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 587 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 if (PetscBTLookup(btvcand,i)) { 590 PetscBool found = PETSC_FALSE; 591 for (j=ii[i];j<ii[i+1] && !found;j++) { 592 PetscInt k,e = jj[j]; 593 if (PetscBTLookup(bte,e)) continue; 594 for (k=iit[e];k<iit[e+1];k++) { 595 PetscInt v = jjt[k]; 596 if (v != i && PetscBTLookup(btvcand,v)) { 597 found = PETSC_TRUE; 598 break; 599 } 600 } 601 } 602 if (!found) { 603 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 604 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 605 } else { 606 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 607 } 608 } 609 } 610 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 611 } 612 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 613 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 614 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 615 616 /* Get the local G^T explicitly */ 617 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 618 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 619 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 620 621 /* Mark interior nodal dofs */ 622 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 623 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 624 for (i=1;i<n_neigh;i++) { 625 for (j=0;j<n_shared[i];j++) { 626 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 627 } 628 } 629 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 630 631 /* communicate corners and splitpoints */ 632 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 633 ierr = PetscArrayzero(sfvleaves,nv);CHKERRQ(ierr); 634 ierr = PetscArrayzero(sfvroots,Lv);CHKERRQ(ierr); 635 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 636 637 if (print) { 638 IS tbz; 639 640 cum = 0; 641 for (i=0;i<nv;i++) 642 if (sfvleaves[i]) 643 vmarks[cum++] = i; 644 645 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 646 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 647 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 648 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 649 } 650 651 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 652 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 653 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);CHKERRQ(ierr); 654 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);CHKERRQ(ierr); 655 656 /* Zero rows of lGt corresponding to identified corners 657 and interior nodal dofs */ 658 cum = 0; 659 for (i=0;i<nv;i++) { 660 if (sfvleaves[i]) { 661 vmarks[cum++] = i; 662 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 663 } 664 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 665 } 666 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 667 if (print) { 668 IS tbz; 669 670 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 671 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 672 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 673 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 674 } 675 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 676 ierr = PetscFree(vmarks);CHKERRQ(ierr); 677 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 678 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 679 680 /* Recompute G */ 681 ierr = MatDestroy(&lG);CHKERRQ(ierr); 682 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 683 if (print) { 684 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 685 ierr = MatView(lG,NULL);CHKERRQ(ierr); 686 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 687 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 688 } 689 690 /* Get primal dofs (if any) */ 691 cum = 0; 692 for (i=0;i<ne;i++) { 693 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 694 } 695 if (fl2g) { 696 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 697 } 698 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 699 if (print) { 700 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 701 ierr = ISView(primals,NULL);CHKERRQ(ierr); 702 } 703 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 704 /* TODO: what if the user passed in some of them ? */ 705 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 706 ierr = ISDestroy(&primals);CHKERRQ(ierr); 707 708 /* Compute edge connectivity */ 709 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 710 711 /* Symbolic conn = lG*lGt */ 712 ierr = MatProductCreate(lG,lGt,NULL,&conn);CHKERRQ(ierr); 713 ierr = MatProductSetType(conn,MATPRODUCT_AB);CHKERRQ(ierr); 714 ierr = MatProductSetAlgorithm(conn,"default");CHKERRQ(ierr); 715 ierr = MatProductSetFill(conn,PETSC_DEFAULT);CHKERRQ(ierr); 716 ierr = PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");CHKERRQ(ierr); 717 ierr = MatProductSetFromOptions(conn);CHKERRQ(ierr); 718 ierr = MatProductSymbolic(conn);CHKERRQ(ierr); 719 720 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 721 if (fl2g) { 722 PetscBT btf; 723 PetscInt *iia,*jja,*iiu,*jju; 724 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 725 726 /* create CSR for all local dofs */ 727 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 728 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 729 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 730 iiu = pcbddc->mat_graph->xadj; 731 jju = pcbddc->mat_graph->adjncy; 732 } else if (pcbddc->use_local_adj) { 733 rest = PETSC_TRUE; 734 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 735 } else { 736 free = PETSC_TRUE; 737 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 738 iiu[0] = 0; 739 for (i=0;i<n;i++) { 740 iiu[i+1] = i+1; 741 jju[i] = -1; 742 } 743 } 744 745 /* import sizes of CSR */ 746 iia[0] = 0; 747 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 748 749 /* overwrite entries corresponding to the Nedelec field */ 750 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 751 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 752 for (i=0;i<ne;i++) { 753 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 754 iia[idxs[i]+1] = ii[i+1]-ii[i]; 755 } 756 757 /* iia in CSR */ 758 for (i=0;i<n;i++) iia[i+1] += iia[i]; 759 760 /* jja in CSR */ 761 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 762 for (i=0;i<n;i++) 763 if (!PetscBTLookup(btf,i)) 764 for (j=0;j<iiu[i+1]-iiu[i];j++) 765 jja[iia[i]+j] = jju[iiu[i]+j]; 766 767 /* map edge dofs connectivity */ 768 if (jj) { 769 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 770 for (i=0;i<ne;i++) { 771 PetscInt e = idxs[i]; 772 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 773 } 774 } 775 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 776 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 777 if (rest) { 778 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 779 } 780 if (free) { 781 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 782 } 783 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 784 } else { 785 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 786 } 787 788 /* Analyze interface for edge dofs */ 789 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 790 pcbddc->mat_graph->twodim = PETSC_FALSE; 791 792 /* Get coarse edges in the edge space */ 793 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 794 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 795 796 if (fl2g) { 797 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 798 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 799 for (i=0;i<nee;i++) { 800 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 801 } 802 } else { 803 eedges = alleedges; 804 primals = allprimals; 805 } 806 807 /* Mark fine edge dofs with their coarse edge id */ 808 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 809 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 810 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 811 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 812 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 813 if (print) { 814 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 815 ierr = ISView(primals,NULL);CHKERRQ(ierr); 816 } 817 818 maxsize = 0; 819 for (i=0;i<nee;i++) { 820 PetscInt size,mark = i+1; 821 822 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 823 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 824 for (j=0;j<size;j++) marks[idxs[j]] = mark; 825 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 826 maxsize = PetscMax(maxsize,size); 827 } 828 829 /* Find coarse edge endpoints */ 830 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 831 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 832 for (i=0;i<nee;i++) { 833 PetscInt mark = i+1,size; 834 835 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 836 if (!size && nedfieldlocal) continue; 837 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 838 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 839 if (print) { 840 ierr = PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);CHKERRQ(ierr); 841 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 842 } 843 for (j=0;j<size;j++) { 844 PetscInt k, ee = idxs[j]; 845 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 846 for (k=ii[ee];k<ii[ee+1];k++) { 847 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 848 if (PetscBTLookup(btv,jj[k])) { 849 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 850 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 851 PetscInt k2; 852 PetscBool corner = PETSC_FALSE; 853 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 854 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 855 /* it's a corner if either is connected with an edge dof belonging to a different cc or 856 if the edge dof lie on the natural part of the boundary */ 857 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 858 corner = PETSC_TRUE; 859 break; 860 } 861 } 862 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 863 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 864 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 865 } else { 866 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 867 } 868 } 869 } 870 } 871 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 872 } 873 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 874 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 875 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 876 877 /* Reset marked primal dofs */ 878 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 879 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 880 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 881 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 882 883 /* Now use the initial lG */ 884 ierr = MatDestroy(&lG);CHKERRQ(ierr); 885 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 886 lG = lGinit; 887 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 888 889 /* Compute extended cols indices */ 890 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 891 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 892 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 893 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 894 i *= maxsize; 895 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 896 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 897 eerr = PETSC_FALSE; 898 for (i=0;i<nee;i++) { 899 PetscInt size,found = 0; 900 901 cum = 0; 902 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 903 if (!size && nedfieldlocal) continue; 904 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 905 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 906 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 907 for (j=0;j<size;j++) { 908 PetscInt k,ee = idxs[j]; 909 for (k=ii[ee];k<ii[ee+1];k++) { 910 PetscInt vv = jj[k]; 911 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 912 else if (!PetscBTLookupSet(btvc,vv)) found++; 913 } 914 } 915 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 916 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 917 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 918 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 919 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 920 /* it may happen that endpoints are not defined at this point 921 if it is the case, mark this edge for a second pass */ 922 if (cum != size -1 || found != 2) { 923 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 924 if (print) { 925 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 926 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 927 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 928 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 929 } 930 eerr = PETSC_TRUE; 931 } 932 } 933 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 934 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr); 935 if (done) { 936 PetscInt *newprimals; 937 938 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 939 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 940 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 941 ierr = PetscArraycpy(newprimals,idxs,cum);CHKERRQ(ierr); 942 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 944 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 945 for (i=0;i<nee;i++) { 946 PetscBool has_candidates = PETSC_FALSE; 947 if (PetscBTLookup(bter,i)) { 948 PetscInt size,mark = i+1; 949 950 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 951 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 952 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 953 for (j=0;j<size;j++) { 954 PetscInt k,ee = idxs[j]; 955 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 956 for (k=ii[ee];k<ii[ee+1];k++) { 957 /* set all candidates located on the edge as corners */ 958 if (PetscBTLookup(btvcand,jj[k])) { 959 PetscInt k2,vv = jj[k]; 960 has_candidates = PETSC_TRUE; 961 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 962 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 963 /* set all edge dofs connected to candidate as primals */ 964 for (k2=iit[vv];k2<iit[vv+1];k2++) { 965 if (marks[jjt[k2]] == mark) { 966 PetscInt k3,ee2 = jjt[k2]; 967 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 968 newprimals[cum++] = ee2; 969 /* finally set the new corners */ 970 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 971 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 972 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 973 } 974 } 975 } 976 } else { 977 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 978 } 979 } 980 } 981 if (!has_candidates) { /* circular edge */ 982 PetscInt k, ee = idxs[0],*tmarks; 983 984 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 985 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 986 for (k=ii[ee];k<ii[ee+1];k++) { 987 PetscInt k2; 988 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 989 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 990 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 991 } 992 for (j=0;j<size;j++) { 993 if (tmarks[idxs[j]] > 1) { 994 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 995 newprimals[cum++] = idxs[j]; 996 } 997 } 998 ierr = PetscFree(tmarks);CHKERRQ(ierr); 999 } 1000 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1001 } 1002 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1003 } 1004 ierr = PetscFree(extcols);CHKERRQ(ierr); 1005 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1006 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1007 if (fl2g) { 1008 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1009 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1010 for (i=0;i<nee;i++) { 1011 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1012 } 1013 ierr = PetscFree(eedges);CHKERRQ(ierr); 1014 } 1015 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1016 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1017 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1018 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1019 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1020 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1021 pcbddc->mat_graph->twodim = PETSC_FALSE; 1022 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1023 if (fl2g) { 1024 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1025 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1026 for (i=0;i<nee;i++) { 1027 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1028 } 1029 } else { 1030 eedges = alleedges; 1031 primals = allprimals; 1032 } 1033 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1034 1035 /* Mark again */ 1036 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 1037 for (i=0;i<nee;i++) { 1038 PetscInt size,mark = i+1; 1039 1040 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1041 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1042 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1043 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1044 } 1045 if (print) { 1046 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1047 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1048 } 1049 1050 /* Recompute extended cols */ 1051 eerr = PETSC_FALSE; 1052 for (i=0;i<nee;i++) { 1053 PetscInt size; 1054 1055 cum = 0; 1056 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1057 if (!size && nedfieldlocal) continue; 1058 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1059 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1060 for (j=0;j<size;j++) { 1061 PetscInt k,ee = idxs[j]; 1062 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1063 } 1064 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1065 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1066 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1067 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1068 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1069 if (cum != size -1) { 1070 if (print) { 1071 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1072 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1073 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1074 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1075 } 1076 eerr = PETSC_TRUE; 1077 } 1078 } 1079 } 1080 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1081 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1082 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1083 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1084 /* an error should not occur at this point */ 1085 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1086 1087 /* Check the number of endpoints */ 1088 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1089 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1090 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1091 for (i=0;i<nee;i++) { 1092 PetscInt size, found = 0, gc[2]; 1093 1094 /* init with defaults */ 1095 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1096 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1097 if (!size && nedfieldlocal) continue; 1098 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1099 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1100 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1101 for (j=0;j<size;j++) { 1102 PetscInt k,ee = idxs[j]; 1103 for (k=ii[ee];k<ii[ee+1];k++) { 1104 PetscInt vv = jj[k]; 1105 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1106 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1107 corners[i*2+found++] = vv; 1108 } 1109 } 1110 } 1111 if (found != 2) { 1112 PetscInt e; 1113 if (fl2g) { 1114 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1115 } else { 1116 e = idxs[0]; 1117 } 1118 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1119 } 1120 1121 /* get primal dof index on this coarse edge */ 1122 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1123 if (gc[0] > gc[1]) { 1124 PetscInt swap = corners[2*i]; 1125 corners[2*i] = corners[2*i+1]; 1126 corners[2*i+1] = swap; 1127 } 1128 cedges[i] = idxs[size-1]; 1129 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1130 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1131 } 1132 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1134 1135 if (PetscDefined(USE_DEBUG)) { 1136 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1137 not interfere with neighbouring coarse edges */ 1138 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1139 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 for (i=0;i<nv;i++) { 1141 PetscInt emax = 0,eemax = 0; 1142 1143 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1144 ierr = PetscArrayzero(emarks,nee+1);CHKERRQ(ierr); 1145 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1146 for (j=1;j<nee+1;j++) { 1147 if (emax < emarks[j]) { 1148 emax = emarks[j]; 1149 eemax = j; 1150 } 1151 } 1152 /* not relevant for edges */ 1153 if (!eemax) continue; 1154 1155 for (j=ii[i];j<ii[i+1];j++) { 1156 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1157 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1158 } 1159 } 1160 } 1161 ierr = PetscFree(emarks);CHKERRQ(ierr); 1162 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1163 } 1164 1165 /* Compute extended rows indices for edge blocks of the change of basis */ 1166 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1167 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1168 extmem *= maxsize; 1169 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1170 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1171 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1172 for (i=0;i<nv;i++) { 1173 PetscInt mark = 0,size,start; 1174 1175 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1176 for (j=ii[i];j<ii[i+1];j++) 1177 if (marks[jj[j]] && !mark) 1178 mark = marks[jj[j]]; 1179 1180 /* not relevant */ 1181 if (!mark) continue; 1182 1183 /* import extended row */ 1184 mark--; 1185 start = mark*extmem+extrowcum[mark]; 1186 size = ii[i+1]-ii[i]; 1187 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1188 ierr = PetscArraycpy(extrow+start,jj+ii[i],size);CHKERRQ(ierr); 1189 extrowcum[mark] += size; 1190 } 1191 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1192 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1193 ierr = PetscFree(marks);CHKERRQ(ierr); 1194 1195 /* Compress extrows */ 1196 cum = 0; 1197 for (i=0;i<nee;i++) { 1198 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1199 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1200 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1201 cum = PetscMax(cum,size); 1202 } 1203 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1204 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1205 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1206 1207 /* Workspace for lapack inner calls and VecSetValues */ 1208 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1209 1210 /* Create change of basis matrix (preallocation can be improved) */ 1211 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1212 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1213 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1214 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1215 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1216 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1217 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1218 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1219 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1220 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1221 1222 /* Defaults to identity */ 1223 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1224 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1225 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1226 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1227 1228 /* Create discrete gradient for the coarser level if needed */ 1229 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1230 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1231 if (pcbddc->current_level < pcbddc->max_levels) { 1232 ISLocalToGlobalMapping cel2g,cvl2g; 1233 IS wis,gwis; 1234 PetscInt cnv,cne; 1235 1236 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1237 if (fl2g) { 1238 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1239 } else { 1240 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1241 pcbddc->nedclocal = wis; 1242 } 1243 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1244 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1245 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1246 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1247 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1248 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1249 1250 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1251 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1252 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1253 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1254 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1255 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1256 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1257 1258 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1259 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1260 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1261 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1262 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1263 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1264 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1265 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1266 } 1267 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1268 1269 #if defined(PRINT_GDET) 1270 inc = 0; 1271 lev = pcbddc->current_level; 1272 #endif 1273 1274 /* Insert values in the change of basis matrix */ 1275 for (i=0;i<nee;i++) { 1276 Mat Gins = NULL, GKins = NULL; 1277 IS cornersis = NULL; 1278 PetscScalar cvals[2]; 1279 1280 if (pcbddc->nedcG) { 1281 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1282 } 1283 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1284 if (Gins && GKins) { 1285 const PetscScalar *data; 1286 const PetscInt *rows,*cols; 1287 PetscInt nrh,nch,nrc,ncc; 1288 1289 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1290 /* H1 */ 1291 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1292 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1293 ierr = MatDenseGetArrayRead(Gins,&data);CHKERRQ(ierr); 1294 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1295 ierr = MatDenseRestoreArrayRead(Gins,&data);CHKERRQ(ierr); 1296 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1297 /* complement */ 1298 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1299 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1300 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1301 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1302 ierr = MatDenseGetArrayRead(GKins,&data);CHKERRQ(ierr); 1303 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1304 ierr = MatDenseRestoreArrayRead(GKins,&data);CHKERRQ(ierr); 1305 1306 /* coarse discrete gradient */ 1307 if (pcbddc->nedcG) { 1308 PetscInt cols[2]; 1309 1310 cols[0] = 2*i; 1311 cols[1] = 2*i+1; 1312 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1313 } 1314 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1315 } 1316 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1317 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1318 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1319 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1320 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1321 } 1322 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1323 1324 /* Start assembling */ 1325 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1326 if (pcbddc->nedcG) { 1327 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1328 } 1329 1330 /* Free */ 1331 if (fl2g) { 1332 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1333 for (i=0;i<nee;i++) { 1334 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1335 } 1336 ierr = PetscFree(eedges);CHKERRQ(ierr); 1337 } 1338 1339 /* hack mat_graph with primal dofs on the coarse edges */ 1340 { 1341 PCBDDCGraph graph = pcbddc->mat_graph; 1342 PetscInt *oqueue = graph->queue; 1343 PetscInt *ocptr = graph->cptr; 1344 PetscInt ncc,*idxs; 1345 1346 /* find first primal edge */ 1347 if (pcbddc->nedclocal) { 1348 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1349 } else { 1350 if (fl2g) { 1351 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1352 } 1353 idxs = cedges; 1354 } 1355 cum = 0; 1356 while (cum < nee && cedges[cum] < 0) cum++; 1357 1358 /* adapt connected components */ 1359 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1360 graph->cptr[0] = 0; 1361 for (i=0,ncc=0;i<graph->ncc;i++) { 1362 PetscInt lc = ocptr[i+1]-ocptr[i]; 1363 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1364 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1365 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1366 ncc++; 1367 lc--; 1368 cum++; 1369 while (cum < nee && cedges[cum] < 0) cum++; 1370 } 1371 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1372 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1373 ncc++; 1374 } 1375 graph->ncc = ncc; 1376 if (pcbddc->nedclocal) { 1377 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1380 } 1381 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1382 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1384 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1385 1386 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1387 ierr = PetscFree(extrow);CHKERRQ(ierr); 1388 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1389 ierr = PetscFree(corners);CHKERRQ(ierr); 1390 ierr = PetscFree(cedges);CHKERRQ(ierr); 1391 ierr = PetscFree(extrows);CHKERRQ(ierr); 1392 ierr = PetscFree(extcols);CHKERRQ(ierr); 1393 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1394 1395 /* Complete assembling */ 1396 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 if (pcbddc->nedcG) { 1398 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1399 #if 0 1400 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1401 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1402 #endif 1403 } 1404 1405 /* set change of basis */ 1406 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1407 ierr = MatDestroy(&T);CHKERRQ(ierr); 1408 1409 PetscFunctionReturn(0); 1410 } 1411 1412 /* the near-null space of BDDC carries information on quadrature weights, 1413 and these can be collinear -> so cheat with MatNullSpaceCreate 1414 and create a suitable set of basis vectors first */ 1415 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1416 { 1417 PetscErrorCode ierr; 1418 PetscInt i; 1419 1420 PetscFunctionBegin; 1421 for (i=0;i<nvecs;i++) { 1422 PetscInt first,last; 1423 1424 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1425 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1426 if (i>=first && i < last) { 1427 PetscScalar *data; 1428 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1429 if (!has_const) { 1430 data[i-first] = 1.; 1431 } else { 1432 data[2*i-first] = 1./PetscSqrtReal(2.); 1433 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1434 } 1435 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1436 } 1437 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1438 } 1439 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1440 for (i=0;i<nvecs;i++) { /* reset vectors */ 1441 PetscInt first,last; 1442 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1443 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1444 if (i>=first && i < last) { 1445 PetscScalar *data; 1446 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1447 if (!has_const) { 1448 data[i-first] = 0.; 1449 } else { 1450 data[2*i-first] = 0.; 1451 data[2*i-first+1] = 0.; 1452 } 1453 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1454 } 1455 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1456 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1462 { 1463 Mat loc_divudotp; 1464 Vec p,v,vins,quad_vec,*quad_vecs; 1465 ISLocalToGlobalMapping map; 1466 PetscScalar *vals; 1467 const PetscScalar *array; 1468 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1469 PetscInt n_neigh,*neigh,*n_shared,**shared; 1470 PetscMPIInt rank; 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1475 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1476 ierr = MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 1477 if (!maxneighs) { 1478 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1479 *nnsp = NULL; 1480 PetscFunctionReturn(0); 1481 } 1482 maxsize = 0; 1483 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1484 ierr = PetscMalloc2(maxsize,&gidxs,maxsize,&vals);CHKERRQ(ierr); 1485 /* create vectors to hold quadrature weights */ 1486 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1487 if (!transpose) { 1488 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1489 } else { 1490 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1491 } 1492 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1493 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1494 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1495 for (i=0;i<maxneighs;i++) { 1496 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1497 } 1498 1499 /* compute local quad vec */ 1500 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1501 if (!transpose) { 1502 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1503 } else { 1504 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1505 } 1506 ierr = VecSet(p,1.);CHKERRQ(ierr); 1507 if (!transpose) { 1508 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1509 } else { 1510 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1511 } 1512 if (vl2l) { 1513 Mat lA; 1514 VecScatter sc; 1515 1516 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1517 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1518 ierr = VecScatterCreate(v,NULL,vins,vl2l,&sc);CHKERRQ(ierr); 1519 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1522 } else { 1523 vins = v; 1524 } 1525 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1526 ierr = VecDestroy(&p);CHKERRQ(ierr); 1527 1528 /* insert in global quadrature vecs */ 1529 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 1530 for (i=1;i<n_neigh;i++) { 1531 const PetscInt *idxs; 1532 PetscInt idx,nn,j; 1533 1534 idxs = shared[i]; 1535 nn = n_shared[i]; 1536 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1537 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1538 idx = -(idx+1); 1539 if (idx < 0 || idx >= maxneighs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs); 1540 ierr = ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);CHKERRQ(ierr); 1541 ierr = VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1542 } 1543 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1544 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1545 if (vl2l) { 1546 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1547 } 1548 ierr = VecDestroy(&v);CHKERRQ(ierr); 1549 ierr = PetscFree2(gidxs,vals);CHKERRQ(ierr); 1550 1551 /* assemble near null space */ 1552 for (i=0;i<maxneighs;i++) { 1553 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1554 } 1555 for (i=0;i<maxneighs;i++) { 1556 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1557 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1558 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1559 } 1560 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1565 { 1566 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1567 PetscErrorCode ierr; 1568 1569 PetscFunctionBegin; 1570 if (primalv) { 1571 if (pcbddc->user_primal_vertices_local) { 1572 IS list[2], newp; 1573 1574 list[0] = primalv; 1575 list[1] = pcbddc->user_primal_vertices_local; 1576 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1577 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1578 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1579 pcbddc->user_primal_vertices_local = newp; 1580 } else { 1581 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1582 } 1583 } 1584 PetscFunctionReturn(0); 1585 } 1586 1587 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1588 { 1589 PetscInt f, *comp = (PetscInt *)ctx; 1590 1591 PetscFunctionBegin; 1592 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1593 PetscFunctionReturn(0); 1594 } 1595 1596 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1597 { 1598 PetscErrorCode ierr; 1599 Vec local,global; 1600 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1601 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1602 PetscBool monolithic = PETSC_FALSE; 1603 1604 PetscFunctionBegin; 1605 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1606 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1607 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1608 /* need to convert from global to local topology information and remove references to information in global ordering */ 1609 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1610 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1611 ierr = VecBindToCPU(global,PETSC_TRUE);CHKERRQ(ierr); 1612 ierr = VecBindToCPU(local,PETSC_TRUE);CHKERRQ(ierr); 1613 if (monolithic) { /* just get block size to properly compute vertices */ 1614 if (pcbddc->vertex_size == 1) { 1615 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1616 } 1617 goto boundary; 1618 } 1619 1620 if (pcbddc->user_provided_isfordofs) { 1621 if (pcbddc->n_ISForDofs) { 1622 PetscInt i; 1623 1624 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1625 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1626 PetscInt bs; 1627 1628 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1629 ierr = ISGetBlockSize(pcbddc->ISForDofs[i],&bs);CHKERRQ(ierr); 1630 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1631 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1632 } 1633 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1634 pcbddc->n_ISForDofs = 0; 1635 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1636 } 1637 } else { 1638 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1639 DM dm; 1640 1641 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1642 if (!dm) { 1643 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1644 } 1645 if (dm) { 1646 IS *fields; 1647 PetscInt nf,i; 1648 1649 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1650 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1651 for (i=0;i<nf;i++) { 1652 PetscInt bs; 1653 1654 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1655 ierr = ISGetBlockSize(fields[i],&bs);CHKERRQ(ierr); 1656 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1657 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1658 } 1659 ierr = PetscFree(fields);CHKERRQ(ierr); 1660 pcbddc->n_ISForDofsLocal = nf; 1661 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1662 PetscContainer c; 1663 1664 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1665 if (c) { 1666 MatISLocalFields lf; 1667 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1668 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1669 } else { /* fallback, create the default fields if bs > 1 */ 1670 PetscInt i, n = matis->A->rmap->n; 1671 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1672 if (i > 1) { 1673 pcbddc->n_ISForDofsLocal = i; 1674 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1675 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1676 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1677 } 1678 } 1679 } 1680 } 1681 } else { 1682 PetscInt i; 1683 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1684 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1685 } 1686 } 1687 } 1688 1689 boundary: 1690 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1691 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1692 } else if (pcbddc->DirichletBoundariesLocal) { 1693 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1694 } 1695 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1696 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1697 } else if (pcbddc->NeumannBoundariesLocal) { 1698 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1699 } 1700 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1701 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1702 } 1703 ierr = VecDestroy(&global);CHKERRQ(ierr); 1704 ierr = VecDestroy(&local);CHKERRQ(ierr); 1705 /* detect local disconnected subdomains if requested (use matis->A) */ 1706 if (pcbddc->detect_disconnected) { 1707 IS primalv = NULL; 1708 PetscInt i; 1709 PetscBool filter = pcbddc->detect_disconnected_filter; 1710 1711 for (i=0;i<pcbddc->n_local_subs;i++) { 1712 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1713 } 1714 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1715 ierr = PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1716 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1717 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1718 } 1719 /* early stage corner detection */ 1720 { 1721 DM dm; 1722 1723 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1724 if (!dm) { 1725 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1726 } 1727 if (dm) { 1728 PetscBool isda; 1729 1730 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1731 if (isda) { 1732 ISLocalToGlobalMapping l2l; 1733 IS corners; 1734 Mat lA; 1735 PetscBool gl,lo; 1736 1737 { 1738 Vec cvec; 1739 const PetscScalar *coords; 1740 PetscInt dof,n,cdim; 1741 PetscBool memc = PETSC_TRUE; 1742 1743 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1744 ierr = DMGetCoordinates(dm,&cvec);CHKERRQ(ierr); 1745 ierr = VecGetLocalSize(cvec,&n);CHKERRQ(ierr); 1746 ierr = VecGetBlockSize(cvec,&cdim);CHKERRQ(ierr); 1747 n /= cdim; 1748 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 1749 ierr = PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);CHKERRQ(ierr); 1750 ierr = VecGetArrayRead(cvec,&coords);CHKERRQ(ierr); 1751 #if defined(PETSC_USE_COMPLEX) 1752 memc = PETSC_FALSE; 1753 #endif 1754 if (dof != 1) memc = PETSC_FALSE; 1755 if (memc) { 1756 ierr = PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);CHKERRQ(ierr); 1757 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1758 PetscReal *bcoords = pcbddc->mat_graph->coords; 1759 PetscInt i, b, d; 1760 1761 for (i=0;i<n;i++) { 1762 for (b=0;b<dof;b++) { 1763 for (d=0;d<cdim;d++) { 1764 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1765 } 1766 } 1767 } 1768 } 1769 ierr = VecRestoreArrayRead(cvec,&coords);CHKERRQ(ierr); 1770 pcbddc->mat_graph->cdim = cdim; 1771 pcbddc->mat_graph->cnloc = dof*n; 1772 pcbddc->mat_graph->cloc = PETSC_FALSE; 1773 } 1774 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1775 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1776 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1777 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1778 lo = (PetscBool)(l2l && corners); 1779 ierr = MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 1780 if (gl) { /* From PETSc's DMDA */ 1781 const PetscInt *idx; 1782 PetscInt dof,bs,*idxout,n; 1783 1784 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1785 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1786 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1787 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1788 if (bs == dof) { 1789 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1790 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1791 } else { /* the original DMDA local-to-local map have been modified */ 1792 PetscInt i,d; 1793 1794 ierr = PetscMalloc1(dof*n,&idxout);CHKERRQ(ierr); 1795 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1796 ierr = ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);CHKERRQ(ierr); 1797 1798 bs = 1; 1799 n *= dof; 1800 } 1801 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1802 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1803 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1804 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1805 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1806 pcbddc->corner_selected = PETSC_TRUE; 1807 pcbddc->corner_selection = PETSC_TRUE; 1808 } 1809 if (corners) { 1810 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1811 } 1812 } 1813 } 1814 } 1815 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1816 DM dm; 1817 1818 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1819 if (!dm) { 1820 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1821 } 1822 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1823 Vec vcoords; 1824 PetscSection section; 1825 PetscReal *coords; 1826 PetscInt d,cdim,nl,nf,**ctxs; 1827 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1828 1829 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1830 ierr = DMGetLocalSection(dm,§ion);CHKERRQ(ierr); 1831 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1832 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1833 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1834 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1835 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1836 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1837 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1838 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1839 for (d=0;d<cdim;d++) { 1840 PetscInt i; 1841 const PetscScalar *v; 1842 1843 for (i=0;i<nf;i++) ctxs[i][0] = d; 1844 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1845 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1846 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1847 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1848 } 1849 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1850 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1851 ierr = PetscFree(coords);CHKERRQ(ierr); 1852 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1853 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1854 } 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1860 { 1861 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1862 PetscErrorCode ierr; 1863 IS nis; 1864 const PetscInt *idxs; 1865 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1866 1867 PetscFunctionBegin; 1868 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1869 if (mop == MPI_LAND) { 1870 /* init rootdata with true */ 1871 for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1; 1872 } else { 1873 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 1874 } 1875 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 1876 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1877 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1878 for (i=0;i<nd;i++) 1879 if (-1 < idxs[i] && idxs[i] < n) 1880 matis->sf_leafdata[idxs[i]] = 1; 1881 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1882 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1883 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1884 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 1885 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 1886 if (mop == MPI_LAND) { 1887 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1888 } else { 1889 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1890 } 1891 for (i=0,nnd=0;i<n;i++) 1892 if (matis->sf_leafdata[i]) 1893 nidxs[nnd++] = i; 1894 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1895 ierr = ISDestroy(is);CHKERRQ(ierr); 1896 *is = nis; 1897 PetscFunctionReturn(0); 1898 } 1899 1900 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1901 { 1902 PC_IS *pcis = (PC_IS*)(pc->data); 1903 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 if (!pcbddc->benign_have_null) { 1908 PetscFunctionReturn(0); 1909 } 1910 if (pcbddc->ChangeOfBasisMatrix) { 1911 Vec swap; 1912 1913 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1914 swap = pcbddc->work_change; 1915 pcbddc->work_change = r; 1916 r = swap; 1917 } 1918 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1919 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1920 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);CHKERRQ(ierr); 1921 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1922 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);CHKERRQ(ierr); 1923 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 1924 ierr = VecSet(z,0.);CHKERRQ(ierr); 1925 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1926 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1927 if (pcbddc->ChangeOfBasisMatrix) { 1928 pcbddc->work_change = r; 1929 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1930 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1936 { 1937 PCBDDCBenignMatMult_ctx ctx; 1938 PetscErrorCode ierr; 1939 PetscBool apply_right,apply_left,reset_x; 1940 1941 PetscFunctionBegin; 1942 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1943 if (transpose) { 1944 apply_right = ctx->apply_left; 1945 apply_left = ctx->apply_right; 1946 } else { 1947 apply_right = ctx->apply_right; 1948 apply_left = ctx->apply_left; 1949 } 1950 reset_x = PETSC_FALSE; 1951 if (apply_right) { 1952 const PetscScalar *ax; 1953 PetscInt nl,i; 1954 1955 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1956 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1957 ierr = PetscArraycpy(ctx->work,ax,nl);CHKERRQ(ierr); 1958 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1959 for (i=0;i<ctx->benign_n;i++) { 1960 PetscScalar sum,val; 1961 const PetscInt *idxs; 1962 PetscInt nz,j; 1963 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1964 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1965 sum = 0.; 1966 if (ctx->apply_p0) { 1967 val = ctx->work[idxs[nz-1]]; 1968 for (j=0;j<nz-1;j++) { 1969 sum += ctx->work[idxs[j]]; 1970 ctx->work[idxs[j]] += val; 1971 } 1972 } else { 1973 for (j=0;j<nz-1;j++) { 1974 sum += ctx->work[idxs[j]]; 1975 } 1976 } 1977 ctx->work[idxs[nz-1]] -= sum; 1978 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1979 } 1980 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1981 reset_x = PETSC_TRUE; 1982 } 1983 if (transpose) { 1984 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1985 } else { 1986 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1987 } 1988 if (reset_x) { 1989 ierr = VecResetArray(x);CHKERRQ(ierr); 1990 } 1991 if (apply_left) { 1992 PetscScalar *ay; 1993 PetscInt i; 1994 1995 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1996 for (i=0;i<ctx->benign_n;i++) { 1997 PetscScalar sum,val; 1998 const PetscInt *idxs; 1999 PetscInt nz,j; 2000 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2001 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2002 val = -ay[idxs[nz-1]]; 2003 if (ctx->apply_p0) { 2004 sum = 0.; 2005 for (j=0;j<nz-1;j++) { 2006 sum += ay[idxs[j]]; 2007 ay[idxs[j]] += val; 2008 } 2009 ay[idxs[nz-1]] += sum; 2010 } else { 2011 for (j=0;j<nz-1;j++) { 2012 ay[idxs[j]] += val; 2013 } 2014 ay[idxs[nz-1]] = 0.; 2015 } 2016 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2017 } 2018 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 2019 } 2020 PetscFunctionReturn(0); 2021 } 2022 2023 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2024 { 2025 PetscErrorCode ierr; 2026 2027 PetscFunctionBegin; 2028 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2029 PetscFunctionReturn(0); 2030 } 2031 2032 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2033 { 2034 PetscErrorCode ierr; 2035 2036 PetscFunctionBegin; 2037 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2038 PetscFunctionReturn(0); 2039 } 2040 2041 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2042 { 2043 PC_IS *pcis = (PC_IS*)pc->data; 2044 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2045 PCBDDCBenignMatMult_ctx ctx; 2046 PetscErrorCode ierr; 2047 2048 PetscFunctionBegin; 2049 if (!restore) { 2050 Mat A_IB,A_BI; 2051 PetscScalar *work; 2052 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2053 2054 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2055 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2056 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2057 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2058 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2059 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2060 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2061 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2062 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2063 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2064 ctx->apply_left = PETSC_TRUE; 2065 ctx->apply_right = PETSC_FALSE; 2066 ctx->apply_p0 = PETSC_FALSE; 2067 ctx->benign_n = pcbddc->benign_n; 2068 if (reuse) { 2069 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2070 ctx->free = PETSC_FALSE; 2071 } else { /* TODO: could be optimized for successive solves */ 2072 ISLocalToGlobalMapping N_to_D; 2073 PetscInt i; 2074 2075 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2076 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2077 for (i=0;i<pcbddc->benign_n;i++) { 2078 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2079 } 2080 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2081 ctx->free = PETSC_TRUE; 2082 } 2083 ctx->A = pcis->A_IB; 2084 ctx->work = work; 2085 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2086 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 pcis->A_IB = A_IB; 2089 2090 /* A_BI as A_IB^T */ 2091 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2092 pcbddc->benign_original_mat = pcis->A_BI; 2093 pcis->A_BI = A_BI; 2094 } else { 2095 if (!pcbddc->benign_original_mat) { 2096 PetscFunctionReturn(0); 2097 } 2098 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2099 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2100 pcis->A_IB = ctx->A; 2101 ctx->A = NULL; 2102 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2103 pcis->A_BI = pcbddc->benign_original_mat; 2104 pcbddc->benign_original_mat = NULL; 2105 if (ctx->free) { 2106 PetscInt i; 2107 for (i=0;i<ctx->benign_n;i++) { 2108 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2109 } 2110 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2111 } 2112 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2113 ierr = PetscFree(ctx);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 /* used just in bddc debug mode */ 2119 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2120 { 2121 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2122 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2123 Mat An; 2124 PetscErrorCode ierr; 2125 2126 PetscFunctionBegin; 2127 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2128 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2129 if (is1) { 2130 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2131 ierr = MatDestroy(&An);CHKERRQ(ierr); 2132 } else { 2133 *B = An; 2134 } 2135 PetscFunctionReturn(0); 2136 } 2137 2138 /* TODO: add reuse flag */ 2139 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2140 { 2141 Mat Bt; 2142 PetscScalar *a,*bdata; 2143 const PetscInt *ii,*ij; 2144 PetscInt m,n,i,nnz,*bii,*bij; 2145 PetscBool flg_row; 2146 PetscErrorCode ierr; 2147 2148 PetscFunctionBegin; 2149 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2150 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2151 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2152 nnz = n; 2153 for (i=0;i<ii[n];i++) { 2154 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2155 } 2156 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2157 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2158 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2159 nnz = 0; 2160 bii[0] = 0; 2161 for (i=0;i<n;i++) { 2162 PetscInt j; 2163 for (j=ii[i];j<ii[i+1];j++) { 2164 PetscScalar entry = a[j]; 2165 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2166 bij[nnz] = ij[j]; 2167 bdata[nnz] = entry; 2168 nnz++; 2169 } 2170 } 2171 bii[i+1] = nnz; 2172 } 2173 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2174 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2175 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2176 { 2177 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2178 b->free_a = PETSC_TRUE; 2179 b->free_ij = PETSC_TRUE; 2180 } 2181 if (*B == A) { 2182 ierr = MatDestroy(&A);CHKERRQ(ierr); 2183 } 2184 *B = Bt; 2185 PetscFunctionReturn(0); 2186 } 2187 2188 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2189 { 2190 Mat B = NULL; 2191 DM dm; 2192 IS is_dummy,*cc_n; 2193 ISLocalToGlobalMapping l2gmap_dummy; 2194 PCBDDCGraph graph; 2195 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2196 PetscInt i,n; 2197 PetscInt *xadj,*adjncy; 2198 PetscBool isplex = PETSC_FALSE; 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 if (ncc) *ncc = 0; 2203 if (cc) *cc = NULL; 2204 if (primalv) *primalv = NULL; 2205 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2206 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2207 if (!dm) { 2208 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2209 } 2210 if (dm) { 2211 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2212 } 2213 if (filter) isplex = PETSC_FALSE; 2214 2215 if (isplex) { /* this code has been modified from plexpartition.c */ 2216 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2217 PetscInt *adj = NULL; 2218 IS cellNumbering; 2219 const PetscInt *cellNum; 2220 PetscBool useCone, useClosure; 2221 PetscSection section; 2222 PetscSegBuffer adjBuffer; 2223 PetscSF sfPoint; 2224 PetscErrorCode ierr; 2225 2226 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2227 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2228 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2229 /* Build adjacency graph via a section/segbuffer */ 2230 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2231 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2232 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2233 /* Always use FVM adjacency to create partitioner graph */ 2234 ierr = DMGetBasicAdjacency(dm, &useCone, &useClosure);CHKERRQ(ierr); 2235 ierr = DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);CHKERRQ(ierr); 2236 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2237 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2238 for (n = 0, p = pStart; p < pEnd; p++) { 2239 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2240 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2241 adjSize = PETSC_DETERMINE; 2242 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2243 for (a = 0; a < adjSize; ++a) { 2244 const PetscInt point = adj[a]; 2245 if (pStart <= point && point < pEnd) { 2246 PetscInt *PETSC_RESTRICT pBuf; 2247 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2248 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2249 *pBuf = point; 2250 } 2251 } 2252 n++; 2253 } 2254 ierr = DMSetBasicAdjacency(dm, useCone, useClosure);CHKERRQ(ierr); 2255 /* Derive CSR graph from section/segbuffer */ 2256 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2257 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2258 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2259 for (idx = 0, p = pStart; p < pEnd; p++) { 2260 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2261 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2262 } 2263 xadj[n] = size; 2264 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2265 /* Clean up */ 2266 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2267 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2268 ierr = PetscFree(adj);CHKERRQ(ierr); 2269 graph->xadj = xadj; 2270 graph->adjncy = adjncy; 2271 } else { 2272 Mat A; 2273 PetscBool isseqaij, flg_row; 2274 2275 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2276 if (!A->rmap->N || !A->cmap->N) { 2277 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2278 PetscFunctionReturn(0); 2279 } 2280 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2281 if (!isseqaij && filter) { 2282 PetscBool isseqdense; 2283 2284 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2285 if (!isseqdense) { 2286 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2287 } else { /* TODO: rectangular case and LDA */ 2288 PetscScalar *array; 2289 PetscReal chop=1.e-6; 2290 2291 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2292 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2293 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2294 for (i=0;i<n;i++) { 2295 PetscInt j; 2296 for (j=i+1;j<n;j++) { 2297 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2298 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2299 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2300 } 2301 } 2302 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2303 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2304 } 2305 } else { 2306 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2307 B = A; 2308 } 2309 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2310 2311 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2312 if (filter) { 2313 PetscScalar *data; 2314 PetscInt j,cum; 2315 2316 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2317 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2318 cum = 0; 2319 for (i=0;i<n;i++) { 2320 PetscInt t; 2321 2322 for (j=xadj[i];j<xadj[i+1];j++) { 2323 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2324 continue; 2325 } 2326 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2327 } 2328 t = xadj_filtered[i]; 2329 xadj_filtered[i] = cum; 2330 cum += t; 2331 } 2332 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2333 graph->xadj = xadj_filtered; 2334 graph->adjncy = adjncy_filtered; 2335 } else { 2336 graph->xadj = xadj; 2337 graph->adjncy = adjncy; 2338 } 2339 } 2340 /* compute local connected components using PCBDDCGraph */ 2341 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2342 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2343 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2344 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2345 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2346 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2347 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2348 2349 /* partial clean up */ 2350 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2351 if (B) { 2352 PetscBool flg_row; 2353 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2354 ierr = MatDestroy(&B);CHKERRQ(ierr); 2355 } 2356 if (isplex) { 2357 ierr = PetscFree(xadj);CHKERRQ(ierr); 2358 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2359 } 2360 2361 /* get back data */ 2362 if (isplex) { 2363 if (ncc) *ncc = graph->ncc; 2364 if (cc || primalv) { 2365 Mat A; 2366 PetscBT btv,btvt; 2367 PetscSection subSection; 2368 PetscInt *ids,cum,cump,*cids,*pids; 2369 2370 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2371 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2372 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2373 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2374 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2375 2376 cids[0] = 0; 2377 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2378 PetscInt j; 2379 2380 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2381 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2382 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2383 2384 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2385 for (k = 0; k < 2*size; k += 2) { 2386 PetscInt s, pp, p = closure[k], off, dof, cdof; 2387 2388 ierr = PetscSectionGetConstraintDof(subSection,p,&cdof);CHKERRQ(ierr); 2389 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2390 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2391 for (s = 0; s < dof-cdof; s++) { 2392 if (PetscBTLookupSet(btvt,off+s)) continue; 2393 if (!PetscBTLookup(btv,off+s)) { 2394 ids[cum++] = off+s; 2395 } else { /* cross-vertex */ 2396 pids[cump++] = off+s; 2397 } 2398 } 2399 ierr = DMPlexGetTreeParent(dm,p,&pp,NULL);CHKERRQ(ierr); 2400 if (pp != p) { 2401 ierr = PetscSectionGetConstraintDof(subSection,pp,&cdof);CHKERRQ(ierr); 2402 ierr = PetscSectionGetOffset(subSection,pp,&off);CHKERRQ(ierr); 2403 ierr = PetscSectionGetDof(subSection,pp,&dof);CHKERRQ(ierr); 2404 for (s = 0; s < dof-cdof; s++) { 2405 if (PetscBTLookupSet(btvt,off+s)) continue; 2406 if (!PetscBTLookup(btv,off+s)) { 2407 ids[cum++] = off+s; 2408 } else { /* cross-vertex */ 2409 pids[cump++] = off+s; 2410 } 2411 } 2412 } 2413 } 2414 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2415 } 2416 cids[i+1] = cum; 2417 /* mark dofs as already assigned */ 2418 for (j = cids[i]; j < cids[i+1]; j++) { 2419 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2420 } 2421 } 2422 if (cc) { 2423 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2424 for (i = 0; i < graph->ncc; i++) { 2425 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2426 } 2427 *cc = cc_n; 2428 } 2429 if (primalv) { 2430 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2431 } 2432 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2433 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2434 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2435 } 2436 } else { 2437 if (ncc) *ncc = graph->ncc; 2438 if (cc) { 2439 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2440 for (i=0;i<graph->ncc;i++) { 2441 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2442 } 2443 *cc = cc_n; 2444 } 2445 } 2446 /* clean up graph */ 2447 graph->xadj = NULL; 2448 graph->adjncy = NULL; 2449 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2454 { 2455 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2456 PC_IS* pcis = (PC_IS*)(pc->data); 2457 IS dirIS = NULL; 2458 PetscInt i; 2459 PetscErrorCode ierr; 2460 2461 PetscFunctionBegin; 2462 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2463 if (zerodiag) { 2464 Mat A; 2465 Vec vec3_N; 2466 PetscScalar *vals; 2467 const PetscInt *idxs; 2468 PetscInt nz,*count; 2469 2470 /* p0 */ 2471 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2472 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2473 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2474 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2475 for (i=0;i<nz;i++) vals[i] = 1.; 2476 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2477 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2478 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2479 /* v_I */ 2480 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2481 for (i=0;i<nz;i++) vals[i] = 0.; 2482 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2483 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2484 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2485 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2486 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2487 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2488 if (dirIS) { 2489 PetscInt n; 2490 2491 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2492 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2493 for (i=0;i<n;i++) vals[i] = 0.; 2494 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2495 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2496 } 2497 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2498 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2499 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2500 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2501 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2502 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2503 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2504 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2505 ierr = PetscFree(vals);CHKERRQ(ierr); 2506 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2507 2508 /* there should not be any pressure dofs lying on the interface */ 2509 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2510 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2511 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2512 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2513 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2514 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2515 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2516 ierr = PetscFree(count);CHKERRQ(ierr); 2517 } 2518 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2519 2520 /* check PCBDDCBenignGetOrSetP0 */ 2521 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2522 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2523 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2524 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2525 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2526 for (i=0;i<pcbddc->benign_n;i++) { 2527 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2528 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2529 } 2530 PetscFunctionReturn(0); 2531 } 2532 2533 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2534 { 2535 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2536 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2537 PetscInt nz,n,benign_n,bsp = 1; 2538 PetscInt *interior_dofs,n_interior_dofs,nneu; 2539 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2540 PetscErrorCode ierr; 2541 2542 PetscFunctionBegin; 2543 if (reuse) goto project_b0; 2544 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2545 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2546 for (n=0;n<pcbddc->benign_n;n++) { 2547 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2548 } 2549 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2550 has_null_pressures = PETSC_TRUE; 2551 have_null = PETSC_TRUE; 2552 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2553 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2554 Checks if all the pressure dofs in each subdomain have a zero diagonal 2555 If not, a change of basis on pressures is not needed 2556 since the local Schur complements are already SPD 2557 */ 2558 if (pcbddc->n_ISForDofsLocal) { 2559 IS iP = NULL; 2560 PetscInt p,*pp; 2561 PetscBool flg; 2562 2563 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);CHKERRQ(ierr); 2564 n = pcbddc->n_ISForDofsLocal; 2565 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2566 ierr = PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);CHKERRQ(ierr); 2567 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2568 if (!flg) { 2569 n = 1; 2570 pp[0] = pcbddc->n_ISForDofsLocal-1; 2571 } 2572 2573 bsp = 0; 2574 for (p=0;p<n;p++) { 2575 PetscInt bs; 2576 2577 if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2578 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2579 bsp += bs; 2580 } 2581 ierr = PetscMalloc1(bsp,&bzerodiag);CHKERRQ(ierr); 2582 bsp = 0; 2583 for (p=0;p<n;p++) { 2584 const PetscInt *idxs; 2585 PetscInt b,bs,npl,*bidxs; 2586 2587 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2588 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);CHKERRQ(ierr); 2589 ierr = ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2590 ierr = PetscMalloc1(npl/bs,&bidxs);CHKERRQ(ierr); 2591 for (b=0;b<bs;b++) { 2592 PetscInt i; 2593 2594 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2595 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);CHKERRQ(ierr); 2596 bsp++; 2597 } 2598 ierr = PetscFree(bidxs);CHKERRQ(ierr); 2599 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2600 } 2601 ierr = ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);CHKERRQ(ierr); 2602 2603 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2604 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2605 if (iP) { 2606 IS newpressures; 2607 2608 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2609 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2610 pressures = newpressures; 2611 } 2612 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2613 if (!sorted) { 2614 ierr = ISSort(pressures);CHKERRQ(ierr); 2615 } 2616 ierr = PetscFree(pp);CHKERRQ(ierr); 2617 } 2618 2619 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2620 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2621 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2622 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2623 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2624 if (!sorted) { 2625 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2626 } 2627 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2628 zerodiag_save = zerodiag; 2629 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2630 if (!nz) { 2631 if (n) have_null = PETSC_FALSE; 2632 has_null_pressures = PETSC_FALSE; 2633 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2634 } 2635 recompute_zerodiag = PETSC_FALSE; 2636 2637 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2638 zerodiag_subs = NULL; 2639 benign_n = 0; 2640 n_interior_dofs = 0; 2641 interior_dofs = NULL; 2642 nneu = 0; 2643 if (pcbddc->NeumannBoundariesLocal) { 2644 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2645 } 2646 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2647 if (checkb) { /* need to compute interior nodes */ 2648 PetscInt n,i,j; 2649 PetscInt n_neigh,*neigh,*n_shared,**shared; 2650 PetscInt *iwork; 2651 2652 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2653 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2654 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2655 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2656 for (i=1;i<n_neigh;i++) 2657 for (j=0;j<n_shared[i];j++) 2658 iwork[shared[i][j]] += 1; 2659 for (i=0;i<n;i++) 2660 if (!iwork[i]) 2661 interior_dofs[n_interior_dofs++] = i; 2662 ierr = PetscFree(iwork);CHKERRQ(ierr); 2663 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2664 } 2665 if (has_null_pressures) { 2666 IS *subs; 2667 PetscInt nsubs,i,j,nl; 2668 const PetscInt *idxs; 2669 PetscScalar *array; 2670 Vec *work; 2671 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2672 2673 subs = pcbddc->local_subs; 2674 nsubs = pcbddc->n_local_subs; 2675 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2676 if (checkb) { 2677 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2678 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2679 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2680 /* work[0] = 1_p */ 2681 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2682 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2683 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2684 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2685 /* work[0] = 1_v */ 2686 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2687 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2688 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2689 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2690 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2691 } 2692 2693 if (nsubs > 1 || bsp > 1) { 2694 IS *is; 2695 PetscInt b,totb; 2696 2697 totb = bsp; 2698 is = bsp > 1 ? bzerodiag : &zerodiag; 2699 nsubs = PetscMax(nsubs,1); 2700 ierr = PetscCalloc1(nsubs*totb,&zerodiag_subs);CHKERRQ(ierr); 2701 for (b=0;b<totb;b++) { 2702 for (i=0;i<nsubs;i++) { 2703 ISLocalToGlobalMapping l2g; 2704 IS t_zerodiag_subs; 2705 PetscInt nl; 2706 2707 if (subs) { 2708 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2709 } else { 2710 IS tis; 2711 2712 ierr = MatGetLocalSize(pcbddc->local_mat,&nl,NULL);CHKERRQ(ierr); 2713 ierr = ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);CHKERRQ(ierr); 2714 ierr = ISLocalToGlobalMappingCreateIS(tis,&l2g);CHKERRQ(ierr); 2715 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2716 } 2717 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);CHKERRQ(ierr); 2718 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2719 if (nl) { 2720 PetscBool valid = PETSC_TRUE; 2721 2722 if (checkb) { 2723 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2724 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2725 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2726 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2727 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2728 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2729 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2730 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2731 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2732 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2733 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2734 for (j=0;j<n_interior_dofs;j++) { 2735 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2736 valid = PETSC_FALSE; 2737 break; 2738 } 2739 } 2740 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2741 } 2742 if (valid && nneu) { 2743 const PetscInt *idxs; 2744 PetscInt nzb; 2745 2746 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2747 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2748 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2749 if (nzb) valid = PETSC_FALSE; 2750 } 2751 if (valid && pressures) { 2752 IS t_pressure_subs,tmp; 2753 PetscInt i1,i2; 2754 2755 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2756 ierr = ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);CHKERRQ(ierr); 2757 ierr = ISGetLocalSize(tmp,&i1);CHKERRQ(ierr); 2758 ierr = ISGetLocalSize(t_zerodiag_subs,&i2);CHKERRQ(ierr); 2759 if (i2 != i1) valid = PETSC_FALSE; 2760 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2761 ierr = ISDestroy(&tmp);CHKERRQ(ierr); 2762 } 2763 if (valid) { 2764 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);CHKERRQ(ierr); 2765 benign_n++; 2766 } else recompute_zerodiag = PETSC_TRUE; 2767 } 2768 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2769 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2770 } 2771 } 2772 } else { /* there's just one subdomain (or zero if they have not been detected */ 2773 PetscBool valid = PETSC_TRUE; 2774 2775 if (nneu) valid = PETSC_FALSE; 2776 if (valid && pressures) { 2777 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2778 } 2779 if (valid && checkb) { 2780 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2781 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2782 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2783 for (j=0;j<n_interior_dofs;j++) { 2784 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2785 valid = PETSC_FALSE; 2786 break; 2787 } 2788 } 2789 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2790 } 2791 if (valid) { 2792 benign_n = 1; 2793 ierr = PetscMalloc1(benign_n,&zerodiag_subs);CHKERRQ(ierr); 2794 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2795 zerodiag_subs[0] = zerodiag; 2796 } 2797 } 2798 if (checkb) { 2799 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2800 } 2801 } 2802 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2803 2804 if (!benign_n) { 2805 PetscInt n; 2806 2807 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2808 recompute_zerodiag = PETSC_FALSE; 2809 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2810 if (n) have_null = PETSC_FALSE; 2811 } 2812 2813 /* final check for null pressures */ 2814 if (zerodiag && pressures) { 2815 ierr = ISEqual(pressures,zerodiag,&have_null);CHKERRQ(ierr); 2816 } 2817 2818 if (recompute_zerodiag) { 2819 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2820 if (benign_n == 1) { 2821 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2822 zerodiag = zerodiag_subs[0]; 2823 } else { 2824 PetscInt i,nzn,*new_idxs; 2825 2826 nzn = 0; 2827 for (i=0;i<benign_n;i++) { 2828 PetscInt ns; 2829 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2830 nzn += ns; 2831 } 2832 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2833 nzn = 0; 2834 for (i=0;i<benign_n;i++) { 2835 PetscInt ns,*idxs; 2836 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2837 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2838 ierr = PetscArraycpy(new_idxs+nzn,idxs,ns);CHKERRQ(ierr); 2839 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2840 nzn += ns; 2841 } 2842 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2843 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2844 } 2845 have_null = PETSC_FALSE; 2846 } 2847 2848 /* determines if the coarse solver will be singular or not */ 2849 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 2850 2851 /* Prepare matrix to compute no-net-flux */ 2852 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2853 Mat A,loc_divudotp; 2854 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2855 IS row,col,isused = NULL; 2856 PetscInt M,N,n,st,n_isused; 2857 2858 if (pressures) { 2859 isused = pressures; 2860 } else { 2861 isused = zerodiag_save; 2862 } 2863 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2864 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2865 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2866 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2867 n_isused = 0; 2868 if (isused) { 2869 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2870 } 2871 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 2872 st = st-n_isused; 2873 if (n) { 2874 const PetscInt *gidxs; 2875 2876 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2877 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2878 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2879 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2880 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2881 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2882 } else { 2883 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2884 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2885 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2886 } 2887 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2888 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2889 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2890 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2891 ierr = ISDestroy(&row);CHKERRQ(ierr); 2892 ierr = ISDestroy(&col);CHKERRQ(ierr); 2893 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2894 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2895 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2896 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2897 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2898 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2899 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2900 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2901 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2902 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2903 } 2904 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2905 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2906 if (bzerodiag) { 2907 PetscInt i; 2908 2909 for (i=0;i<bsp;i++) { 2910 ierr = ISDestroy(&bzerodiag[i]);CHKERRQ(ierr); 2911 } 2912 ierr = PetscFree(bzerodiag);CHKERRQ(ierr); 2913 } 2914 pcbddc->benign_n = benign_n; 2915 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2916 2917 /* determines if the problem has subdomains with 0 pressure block */ 2918 have_null = (PetscBool)(!!pcbddc->benign_n); 2919 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 2920 2921 project_b0: 2922 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2923 /* change of basis and p0 dofs */ 2924 if (pcbddc->benign_n) { 2925 PetscInt i,s,*nnz; 2926 2927 /* local change of basis for pressures */ 2928 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2929 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2930 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2931 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2932 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2933 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2934 for (i=0;i<pcbddc->benign_n;i++) { 2935 const PetscInt *idxs; 2936 PetscInt nzs,j; 2937 2938 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2939 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2940 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2941 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2942 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2943 } 2944 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2945 ierr = MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2946 ierr = PetscFree(nnz);CHKERRQ(ierr); 2947 /* set identity by default */ 2948 for (i=0;i<n;i++) { 2949 ierr = MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);CHKERRQ(ierr); 2950 } 2951 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2952 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2953 /* set change on pressures */ 2954 for (s=0;s<pcbddc->benign_n;s++) { 2955 PetscScalar *array; 2956 const PetscInt *idxs; 2957 PetscInt nzs; 2958 2959 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2960 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2961 for (i=0;i<nzs-1;i++) { 2962 PetscScalar vals[2]; 2963 PetscInt cols[2]; 2964 2965 cols[0] = idxs[i]; 2966 cols[1] = idxs[nzs-1]; 2967 vals[0] = 1.; 2968 vals[1] = 1.; 2969 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2970 } 2971 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2972 for (i=0;i<nzs-1;i++) array[i] = -1.; 2973 array[nzs-1] = 1.; 2974 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2975 /* store local idxs for p0 */ 2976 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2977 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2978 ierr = PetscFree(array);CHKERRQ(ierr); 2979 } 2980 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2981 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 2983 /* project if needed */ 2984 if (pcbddc->benign_change_explicit) { 2985 Mat M; 2986 2987 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2988 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2989 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2990 ierr = MatDestroy(&M);CHKERRQ(ierr); 2991 } 2992 /* store global idxs for p0 */ 2993 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2994 } 2995 *zerodiaglocal = zerodiag; 2996 PetscFunctionReturn(0); 2997 } 2998 2999 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3000 { 3001 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3002 PetscScalar *array; 3003 PetscErrorCode ierr; 3004 3005 PetscFunctionBegin; 3006 if (!pcbddc->benign_sf) { 3007 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 3008 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 3009 } 3010 if (get) { 3011 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3012 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);CHKERRQ(ierr); 3013 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);CHKERRQ(ierr); 3014 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3015 } else { 3016 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 3017 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);CHKERRQ(ierr); 3018 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);CHKERRQ(ierr); 3019 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 3020 } 3021 PetscFunctionReturn(0); 3022 } 3023 3024 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3025 { 3026 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3027 PetscErrorCode ierr; 3028 3029 PetscFunctionBegin; 3030 /* TODO: add error checking 3031 - avoid nested pop (or push) calls. 3032 - cannot push before pop. 3033 - cannot call this if pcbddc->local_mat is NULL 3034 */ 3035 if (!pcbddc->benign_n) { 3036 PetscFunctionReturn(0); 3037 } 3038 if (pop) { 3039 if (pcbddc->benign_change_explicit) { 3040 IS is_p0; 3041 MatReuse reuse; 3042 3043 /* extract B_0 */ 3044 reuse = MAT_INITIAL_MATRIX; 3045 if (pcbddc->benign_B0) { 3046 reuse = MAT_REUSE_MATRIX; 3047 } 3048 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 3049 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 3050 /* remove rows and cols from local problem */ 3051 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 3052 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3053 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 3054 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3055 } else { 3056 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3057 PetscScalar *vals; 3058 PetscInt i,n,*idxs_ins; 3059 3060 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 3061 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 3062 if (!pcbddc->benign_B0) { 3063 PetscInt *nnz; 3064 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 3065 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 3066 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3067 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 3068 for (i=0;i<pcbddc->benign_n;i++) { 3069 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 3070 nnz[i] = n - nnz[i]; 3071 } 3072 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 3073 ierr = MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3074 ierr = PetscFree(nnz);CHKERRQ(ierr); 3075 } 3076 3077 for (i=0;i<pcbddc->benign_n;i++) { 3078 PetscScalar *array; 3079 PetscInt *idxs,j,nz,cum; 3080 3081 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 3082 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3083 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3084 for (j=0;j<nz;j++) vals[j] = 1.; 3085 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3086 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 3087 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 3088 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 3089 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 3090 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3091 cum = 0; 3092 for (j=0;j<n;j++) { 3093 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3094 vals[cum] = array[j]; 3095 idxs_ins[cum] = j; 3096 cum++; 3097 } 3098 } 3099 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3100 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3101 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3102 } 3103 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3104 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3105 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3106 } 3107 } else { /* push */ 3108 if (pcbddc->benign_change_explicit) { 3109 PetscInt i; 3110 3111 for (i=0;i<pcbddc->benign_n;i++) { 3112 PetscScalar *B0_vals; 3113 PetscInt *B0_cols,B0_ncol; 3114 3115 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3116 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3117 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3118 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3119 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3120 } 3121 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3122 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3123 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3124 } 3125 PetscFunctionReturn(0); 3126 } 3127 3128 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3129 { 3130 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3131 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3132 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3133 PetscBLASInt *B_iwork,*B_ifail; 3134 PetscScalar *work,lwork; 3135 PetscScalar *St,*S,*eigv; 3136 PetscScalar *Sarray,*Starray; 3137 PetscReal *eigs,thresh,lthresh,uthresh; 3138 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3139 PetscBool allocated_S_St; 3140 #if defined(PETSC_USE_COMPLEX) 3141 PetscReal *rwork; 3142 #endif 3143 PetscErrorCode ierr; 3144 3145 PetscFunctionBegin; 3146 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3147 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3148 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3149 ierr = PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3150 3151 if (pcbddc->dbg_flag) { 3152 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3155 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3156 } 3157 3158 if (pcbddc->dbg_flag) { 3159 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);CHKERRQ(ierr); 3160 } 3161 3162 /* max size of subsets */ 3163 mss = 0; 3164 for (i=0;i<sub_schurs->n_subs;i++) { 3165 PetscInt subset_size; 3166 3167 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3168 mss = PetscMax(mss,subset_size); 3169 } 3170 3171 /* min/max and threshold */ 3172 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3173 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3174 nmax = PetscMax(nmin,nmax); 3175 allocated_S_St = PETSC_FALSE; 3176 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3177 allocated_S_St = PETSC_TRUE; 3178 } 3179 3180 /* allocate lapack workspace */ 3181 cum = cum2 = 0; 3182 maxneigs = 0; 3183 for (i=0;i<sub_schurs->n_subs;i++) { 3184 PetscInt n,subset_size; 3185 3186 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3187 n = PetscMin(subset_size,nmax); 3188 cum += subset_size; 3189 cum2 += subset_size*n; 3190 maxneigs = PetscMax(maxneigs,n); 3191 } 3192 lwork = 0; 3193 if (mss) { 3194 if (sub_schurs->is_symmetric) { 3195 PetscScalar sdummy = 0.; 3196 PetscBLASInt B_itype = 1; 3197 PetscBLASInt B_N = mss, idummy = 0; 3198 PetscReal rdummy = 0.,zero = 0.0; 3199 PetscReal eps = 0.0; /* dlamch? */ 3200 3201 B_lwork = -1; 3202 /* some implementations may complain about NULL pointers, even if we are querying */ 3203 S = &sdummy; 3204 St = &sdummy; 3205 eigs = &rdummy; 3206 eigv = &sdummy; 3207 B_iwork = &idummy; 3208 B_ifail = &idummy; 3209 #if defined(PETSC_USE_COMPLEX) 3210 rwork = &rdummy; 3211 #endif 3212 thresh = 1.0; 3213 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3214 #if defined(PETSC_USE_COMPLEX) 3215 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3216 #else 3217 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3218 #endif 3219 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3220 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3221 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3222 } 3223 3224 nv = 0; 3225 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3226 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3227 } 3228 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3229 if (allocated_S_St) { 3230 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3231 } 3232 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3233 #if defined(PETSC_USE_COMPLEX) 3234 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3235 #endif 3236 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3237 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3238 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3239 nv+cum,&pcbddc->adaptive_constraints_idxs, 3240 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3241 ierr = PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);CHKERRQ(ierr); 3242 3243 maxneigs = 0; 3244 cum = cumarray = 0; 3245 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3246 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3247 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3248 const PetscInt *idxs; 3249 3250 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3251 for (cum=0;cum<nv;cum++) { 3252 pcbddc->adaptive_constraints_n[cum] = 1; 3253 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3254 pcbddc->adaptive_constraints_data[cum] = 1.0; 3255 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3256 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3257 } 3258 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3259 } 3260 3261 if (mss) { /* multilevel */ 3262 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3263 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3264 } 3265 3266 lthresh = pcbddc->adaptive_threshold[0]; 3267 uthresh = pcbddc->adaptive_threshold[1]; 3268 for (i=0;i<sub_schurs->n_subs;i++) { 3269 const PetscInt *idxs; 3270 PetscReal upper,lower; 3271 PetscInt j,subset_size,eigs_start = 0; 3272 PetscBLASInt B_N; 3273 PetscBool same_data = PETSC_FALSE; 3274 PetscBool scal = PETSC_FALSE; 3275 3276 if (pcbddc->use_deluxe_scaling) { 3277 upper = PETSC_MAX_REAL; 3278 lower = uthresh; 3279 } else { 3280 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3281 upper = 1./uthresh; 3282 lower = 0.; 3283 } 3284 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3285 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3286 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3287 /* this is experimental: we assume the dofs have been properly grouped to have 3288 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3289 if (!sub_schurs->is_posdef) { 3290 Mat T; 3291 3292 for (j=0;j<subset_size;j++) { 3293 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3294 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3295 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3296 ierr = MatDestroy(&T);CHKERRQ(ierr); 3297 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3298 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3299 ierr = MatDestroy(&T);CHKERRQ(ierr); 3300 if (sub_schurs->change_primal_sub) { 3301 PetscInt nz,k; 3302 const PetscInt *idxs; 3303 3304 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3305 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3306 for (k=0;k<nz;k++) { 3307 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3308 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3309 } 3310 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3311 } 3312 scal = PETSC_TRUE; 3313 break; 3314 } 3315 } 3316 } 3317 3318 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3319 if (sub_schurs->is_symmetric) { 3320 PetscInt j,k; 3321 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3322 ierr = PetscArrayzero(S,subset_size*subset_size);CHKERRQ(ierr); 3323 ierr = PetscArrayzero(St,subset_size*subset_size);CHKERRQ(ierr); 3324 } 3325 for (j=0;j<subset_size;j++) { 3326 for (k=j;k<subset_size;k++) { 3327 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3328 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3329 } 3330 } 3331 } else { 3332 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3333 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3334 } 3335 } else { 3336 S = Sarray + cumarray; 3337 St = Starray + cumarray; 3338 } 3339 /* see if we can save some work */ 3340 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3341 ierr = PetscArraycmp(S,St,subset_size*subset_size,&same_data);CHKERRQ(ierr); 3342 } 3343 3344 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3345 B_neigs = 0; 3346 } else { 3347 if (sub_schurs->is_symmetric) { 3348 PetscBLASInt B_itype = 1; 3349 PetscBLASInt B_IL, B_IU; 3350 PetscReal eps = -1.0; /* dlamch? */ 3351 PetscInt nmin_s; 3352 PetscBool compute_range; 3353 3354 B_neigs = 0; 3355 compute_range = (PetscBool)!same_data; 3356 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3357 3358 if (pcbddc->dbg_flag) { 3359 PetscInt nc = 0; 3360 3361 if (sub_schurs->change_primal_sub) { 3362 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3363 } 3364 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3365 } 3366 3367 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3368 if (compute_range) { 3369 3370 /* ask for eigenvalues larger than thresh */ 3371 if (sub_schurs->is_posdef) { 3372 #if defined(PETSC_USE_COMPLEX) 3373 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3374 #else 3375 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3376 #endif 3377 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3378 } else { /* no theory so far, but it works nicely */ 3379 PetscInt recipe = 0,recipe_m = 1; 3380 PetscReal bb[2]; 3381 3382 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3383 switch (recipe) { 3384 case 0: 3385 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3386 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3387 #if defined(PETSC_USE_COMPLEX) 3388 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3389 #else 3390 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3391 #endif 3392 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3393 break; 3394 case 1: 3395 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3396 #if defined(PETSC_USE_COMPLEX) 3397 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3398 #else 3399 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3400 #endif 3401 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3402 if (!scal) { 3403 PetscBLASInt B_neigs2 = 0; 3404 3405 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3406 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3407 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3408 #if defined(PETSC_USE_COMPLEX) 3409 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3410 #else 3411 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3412 #endif 3413 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3414 B_neigs += B_neigs2; 3415 } 3416 break; 3417 case 2: 3418 if (scal) { 3419 bb[0] = PETSC_MIN_REAL; 3420 bb[1] = 0; 3421 #if defined(PETSC_USE_COMPLEX) 3422 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3423 #else 3424 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3425 #endif 3426 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3427 } else { 3428 PetscBLASInt B_neigs2 = 0; 3429 PetscBool import = PETSC_FALSE; 3430 3431 lthresh = PetscMax(lthresh,0.0); 3432 if (lthresh > 0.0) { 3433 bb[0] = PETSC_MIN_REAL; 3434 bb[1] = lthresh*lthresh; 3435 3436 import = PETSC_TRUE; 3437 #if defined(PETSC_USE_COMPLEX) 3438 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3439 #else 3440 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3441 #endif 3442 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3443 } 3444 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3445 bb[1] = PETSC_MAX_REAL; 3446 if (import) { 3447 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3448 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3449 } 3450 #if defined(PETSC_USE_COMPLEX) 3451 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3452 #else 3453 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3454 #endif 3455 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3456 B_neigs += B_neigs2; 3457 } 3458 break; 3459 case 3: 3460 if (scal) { 3461 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3462 } else { 3463 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3464 } 3465 if (!scal) { 3466 bb[0] = uthresh; 3467 bb[1] = PETSC_MAX_REAL; 3468 #if defined(PETSC_USE_COMPLEX) 3469 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3470 #else 3471 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3472 #endif 3473 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3474 } 3475 if (recipe_m > 0 && B_N - B_neigs > 0) { 3476 PetscBLASInt B_neigs2 = 0; 3477 3478 B_IL = 1; 3479 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3480 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3481 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3482 #if defined(PETSC_USE_COMPLEX) 3483 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3484 #else 3485 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3486 #endif 3487 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3488 B_neigs += B_neigs2; 3489 } 3490 break; 3491 case 4: 3492 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3493 #if defined(PETSC_USE_COMPLEX) 3494 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3495 #else 3496 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3497 #endif 3498 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3499 { 3500 PetscBLASInt B_neigs2 = 0; 3501 3502 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3503 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3504 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3505 #if defined(PETSC_USE_COMPLEX) 3506 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3507 #else 3508 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3509 #endif 3510 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3511 B_neigs += B_neigs2; 3512 } 3513 break; 3514 case 5: /* same as before: first compute all eigenvalues, then filter */ 3515 #if defined(PETSC_USE_COMPLEX) 3516 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3517 #else 3518 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3519 #endif 3520 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3521 { 3522 PetscInt e,k,ne; 3523 for (e=0,ne=0;e<B_neigs;e++) { 3524 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3525 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3526 eigs[ne] = eigs[e]; 3527 ne++; 3528 } 3529 } 3530 ierr = PetscArraycpy(eigv,S,B_N*ne);CHKERRQ(ierr); 3531 B_neigs = ne; 3532 } 3533 break; 3534 default: 3535 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3536 } 3537 } 3538 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3539 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3540 B_IL = 1; 3541 #if defined(PETSC_USE_COMPLEX) 3542 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3543 #else 3544 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3545 #endif 3546 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3547 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3548 PetscInt k; 3549 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3550 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3551 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3552 nmin = nmax; 3553 ierr = PetscArrayzero(eigv,subset_size*nmax);CHKERRQ(ierr); 3554 for (k=0;k<nmax;k++) { 3555 eigs[k] = 1./PETSC_SMALL; 3556 eigv[k*(subset_size+1)] = 1.0; 3557 } 3558 } 3559 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3560 if (B_ierr) { 3561 if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3562 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3563 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3564 } 3565 3566 if (B_neigs > nmax) { 3567 if (pcbddc->dbg_flag) { 3568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);CHKERRQ(ierr); 3569 } 3570 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3571 B_neigs = nmax; 3572 } 3573 3574 nmin_s = PetscMin(nmin,B_N); 3575 if (B_neigs < nmin_s) { 3576 PetscBLASInt B_neigs2 = 0; 3577 3578 if (pcbddc->use_deluxe_scaling) { 3579 if (scal) { 3580 B_IU = nmin_s; 3581 B_IL = B_neigs + 1; 3582 } else { 3583 B_IL = B_N - nmin_s + 1; 3584 B_IU = B_N - B_neigs; 3585 } 3586 } else { 3587 B_IL = B_neigs + 1; 3588 B_IU = nmin_s; 3589 } 3590 if (pcbddc->dbg_flag) { 3591 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);CHKERRQ(ierr); 3592 } 3593 if (sub_schurs->is_symmetric) { 3594 PetscInt j,k; 3595 for (j=0;j<subset_size;j++) { 3596 for (k=j;k<subset_size;k++) { 3597 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3598 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3599 } 3600 } 3601 } else { 3602 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3603 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3604 } 3605 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3606 #if defined(PETSC_USE_COMPLEX) 3607 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3608 #else 3609 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3610 #endif 3611 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3612 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3613 B_neigs += B_neigs2; 3614 } 3615 if (B_ierr) { 3616 if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3617 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3618 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3619 } 3620 if (pcbddc->dbg_flag) { 3621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3622 for (j=0;j<B_neigs;j++) { 3623 if (eigs[j] == 0.0) { 3624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3625 } else { 3626 if (pcbddc->use_deluxe_scaling) { 3627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3628 } else { 3629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3630 } 3631 } 3632 } 3633 } 3634 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3635 } 3636 /* change the basis back to the original one */ 3637 if (sub_schurs->change) { 3638 Mat change,phi,phit; 3639 3640 if (pcbddc->dbg_flag > 2) { 3641 PetscInt ii; 3642 for (ii=0;ii<B_neigs;ii++) { 3643 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3644 for (j=0;j<B_N;j++) { 3645 #if defined(PETSC_USE_COMPLEX) 3646 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3647 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3649 #else 3650 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3651 #endif 3652 } 3653 } 3654 } 3655 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3656 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3657 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3658 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3659 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3660 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3661 } 3662 maxneigs = PetscMax(B_neigs,maxneigs); 3663 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3664 if (B_neigs) { 3665 ierr = PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);CHKERRQ(ierr); 3666 3667 if (pcbddc->dbg_flag > 1) { 3668 PetscInt ii; 3669 for (ii=0;ii<B_neigs;ii++) { 3670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3671 for (j=0;j<B_N;j++) { 3672 #if defined(PETSC_USE_COMPLEX) 3673 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3674 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3675 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3676 #else 3677 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3678 #endif 3679 } 3680 } 3681 } 3682 ierr = PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);CHKERRQ(ierr); 3683 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3684 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3685 cum++; 3686 } 3687 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3688 /* shift for next computation */ 3689 cumarray += subset_size*subset_size; 3690 } 3691 if (pcbddc->dbg_flag) { 3692 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3693 } 3694 3695 if (mss) { 3696 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3697 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3698 /* destroy matrices (junk) */ 3699 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3700 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3701 } 3702 if (allocated_S_St) { 3703 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3704 } 3705 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3706 #if defined(PETSC_USE_COMPLEX) 3707 ierr = PetscFree(rwork);CHKERRQ(ierr); 3708 #endif 3709 if (pcbddc->dbg_flag) { 3710 PetscInt maxneigs_r; 3711 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 3712 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);CHKERRQ(ierr); 3713 } 3714 ierr = PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3715 PetscFunctionReturn(0); 3716 } 3717 3718 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3719 { 3720 PetscScalar *coarse_submat_vals; 3721 PetscErrorCode ierr; 3722 3723 PetscFunctionBegin; 3724 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3725 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3726 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3727 3728 /* Setup local neumann solver ksp_R */ 3729 /* PCBDDCSetUpLocalScatters should be called first! */ 3730 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3731 3732 /* 3733 Setup local correction and local part of coarse basis. 3734 Gives back the dense local part of the coarse matrix in column major ordering 3735 */ 3736 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3737 3738 /* Compute total number of coarse nodes and setup coarse solver */ 3739 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3740 3741 /* free */ 3742 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3743 PetscFunctionReturn(0); 3744 } 3745 3746 PetscErrorCode PCBDDCResetCustomization(PC pc) 3747 { 3748 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3749 PetscErrorCode ierr; 3750 3751 PetscFunctionBegin; 3752 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3753 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3754 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3755 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3756 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3757 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3758 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3759 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3760 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3761 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3762 PetscFunctionReturn(0); 3763 } 3764 3765 PetscErrorCode PCBDDCResetTopography(PC pc) 3766 { 3767 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3768 PetscInt i; 3769 PetscErrorCode ierr; 3770 3771 PetscFunctionBegin; 3772 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3773 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3774 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3775 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3776 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3777 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3778 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3779 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3780 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3781 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3782 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3783 for (i=0;i<pcbddc->n_local_subs;i++) { 3784 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3785 } 3786 pcbddc->n_local_subs = 0; 3787 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3788 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3789 pcbddc->graphanalyzed = PETSC_FALSE; 3790 pcbddc->recompute_topography = PETSC_TRUE; 3791 pcbddc->corner_selected = PETSC_FALSE; 3792 PetscFunctionReturn(0); 3793 } 3794 3795 PetscErrorCode PCBDDCResetSolvers(PC pc) 3796 { 3797 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3798 PetscErrorCode ierr; 3799 3800 PetscFunctionBegin; 3801 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3802 if (pcbddc->coarse_phi_B) { 3803 PetscScalar *array; 3804 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3805 ierr = PetscFree(array);CHKERRQ(ierr); 3806 } 3807 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3808 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3809 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3810 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3811 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3812 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3813 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3814 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3815 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3816 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3817 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3818 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3819 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3820 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3821 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3822 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3823 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3824 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3825 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3826 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3827 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3828 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3829 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3830 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3831 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3832 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3833 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3834 if (pcbddc->benign_zerodiag_subs) { 3835 PetscInt i; 3836 for (i=0;i<pcbddc->benign_n;i++) { 3837 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3838 } 3839 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3840 } 3841 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3842 PetscFunctionReturn(0); 3843 } 3844 3845 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3846 { 3847 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3848 PC_IS *pcis = (PC_IS*)pc->data; 3849 VecType impVecType; 3850 PetscInt n_constraints,n_R,old_size; 3851 PetscErrorCode ierr; 3852 3853 PetscFunctionBegin; 3854 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3855 n_R = pcis->n - pcbddc->n_vertices; 3856 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3857 /* local work vectors (try to avoid unneeded work)*/ 3858 /* R nodes */ 3859 old_size = -1; 3860 if (pcbddc->vec1_R) { 3861 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3862 } 3863 if (n_R != old_size) { 3864 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3865 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3866 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3867 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3868 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3869 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3870 } 3871 /* local primal dofs */ 3872 old_size = -1; 3873 if (pcbddc->vec1_P) { 3874 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3875 } 3876 if (pcbddc->local_primal_size != old_size) { 3877 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3878 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3879 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3880 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3881 } 3882 /* local explicit constraints */ 3883 old_size = -1; 3884 if (pcbddc->vec1_C) { 3885 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3886 } 3887 if (n_constraints && n_constraints != old_size) { 3888 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3889 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3890 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3891 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3892 } 3893 PetscFunctionReturn(0); 3894 } 3895 3896 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3897 { 3898 PetscErrorCode ierr; 3899 /* pointers to pcis and pcbddc */ 3900 PC_IS* pcis = (PC_IS*)pc->data; 3901 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3902 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3903 /* submatrices of local problem */ 3904 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3905 /* submatrices of local coarse problem */ 3906 Mat S_VV,S_CV,S_VC,S_CC; 3907 /* working matrices */ 3908 Mat C_CR; 3909 /* additional working stuff */ 3910 PC pc_R; 3911 Mat F,Brhs = NULL; 3912 Vec dummy_vec; 3913 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3914 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3915 PetscScalar *work; 3916 PetscInt *idx_V_B; 3917 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3918 PetscInt i,n_R,n_D,n_B; 3919 PetscScalar one=1.0,m_one=-1.0; 3920 3921 PetscFunctionBegin; 3922 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3923 ierr = PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3924 3925 /* Set Non-overlapping dimensions */ 3926 n_vertices = pcbddc->n_vertices; 3927 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3928 n_B = pcis->n_B; 3929 n_D = pcis->n - n_B; 3930 n_R = pcis->n - n_vertices; 3931 3932 /* vertices in boundary numbering */ 3933 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3934 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3935 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3936 3937 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3938 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3939 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3940 ierr = MatDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3942 ierr = MatDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3943 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3944 ierr = MatDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3945 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3946 ierr = MatDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3947 3948 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3949 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3950 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3951 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3952 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3953 lda_rhs = n_R; 3954 need_benign_correction = PETSC_FALSE; 3955 if (isLU || isCHOL) { 3956 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3957 } else if (sub_schurs && sub_schurs->reuse_solver) { 3958 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3959 MatFactorType type; 3960 3961 F = reuse_solver->F; 3962 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3963 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3964 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3965 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3966 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3967 } else F = NULL; 3968 3969 /* determine if we can use a sparse right-hand side */ 3970 sparserhs = PETSC_FALSE; 3971 if (F) { 3972 MatSolverType solver; 3973 3974 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3975 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3976 } 3977 3978 /* allocate workspace */ 3979 n = 0; 3980 if (n_constraints) { 3981 n += lda_rhs*n_constraints; 3982 } 3983 if (n_vertices) { 3984 n = PetscMax(2*lda_rhs*n_vertices,n); 3985 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3986 } 3987 if (!pcbddc->symmetric_primal) { 3988 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3989 } 3990 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3991 3992 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3993 dummy_vec = NULL; 3994 if (need_benign_correction && lda_rhs != n_R && F) { 3995 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);CHKERRQ(ierr); 3996 ierr = VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);CHKERRQ(ierr); 3997 ierr = VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);CHKERRQ(ierr); 3998 } 3999 4000 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 4001 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 4002 4003 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4004 if (n_constraints) { 4005 Mat M3,C_B; 4006 IS is_aux; 4007 PetscScalar *array,*array2; 4008 4009 /* Extract constraints on R nodes: C_{CR} */ 4010 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 4011 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 4012 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4013 4014 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4015 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4016 if (!sparserhs) { 4017 ierr = PetscArrayzero(work,lda_rhs*n_constraints);CHKERRQ(ierr); 4018 for (i=0;i<n_constraints;i++) { 4019 const PetscScalar *row_cmat_values; 4020 const PetscInt *row_cmat_indices; 4021 PetscInt size_of_constraint,j; 4022 4023 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4024 for (j=0;j<size_of_constraint;j++) { 4025 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4026 } 4027 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4028 } 4029 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 4030 } else { 4031 Mat tC_CR; 4032 4033 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4034 if (lda_rhs != n_R) { 4035 PetscScalar *aa; 4036 PetscInt r,*ii,*jj; 4037 PetscBool done; 4038 4039 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4040 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4041 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 4042 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 4043 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4044 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4045 } else { 4046 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 4047 tC_CR = C_CR; 4048 } 4049 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 4050 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 4051 } 4052 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 4053 if (F) { 4054 if (need_benign_correction) { 4055 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4056 4057 /* rhs is already zero on interior dofs, no need to change the rhs */ 4058 ierr = PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);CHKERRQ(ierr); 4059 } 4060 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 4061 if (need_benign_correction) { 4062 PetscScalar *marr; 4063 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4064 4065 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4066 if (lda_rhs != n_R) { 4067 for (i=0;i<n_constraints;i++) { 4068 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4069 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4070 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4071 } 4072 } else { 4073 for (i=0;i<n_constraints;i++) { 4074 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4075 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4076 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4077 } 4078 } 4079 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4080 } 4081 } else { 4082 PetscScalar *marr; 4083 4084 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4085 for (i=0;i<n_constraints;i++) { 4086 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4087 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 4088 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4089 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4090 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4091 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4092 } 4093 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4094 } 4095 if (sparserhs) { 4096 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4097 } 4098 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4099 if (!pcbddc->switch_static) { 4100 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4101 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4102 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4103 for (i=0;i<n_constraints;i++) { 4104 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 4105 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 4106 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4107 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4108 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4109 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4110 } 4111 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4112 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4113 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4114 } else { 4115 if (lda_rhs != n_R) { 4116 IS dummy; 4117 4118 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4119 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4120 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4121 } else { 4122 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4123 pcbddc->local_auxmat2 = local_auxmat2_R; 4124 } 4125 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4126 } 4127 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4128 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4129 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4130 if (isCHOL) { 4131 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4132 } else { 4133 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4134 } 4135 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4136 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4137 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4138 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4139 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4140 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4141 } 4142 4143 /* Get submatrices from subdomain matrix */ 4144 if (n_vertices) { 4145 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4146 PetscBool oldpin; 4147 #endif 4148 PetscBool isaij; 4149 IS is_aux; 4150 4151 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4152 IS tis; 4153 4154 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4155 ierr = ISSort(tis);CHKERRQ(ierr); 4156 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4157 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4158 } else { 4159 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4160 } 4161 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4162 oldpin = pcbddc->local_mat->boundtocpu; 4163 #endif 4164 ierr = MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);CHKERRQ(ierr); 4165 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4166 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4167 ierr = PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);CHKERRQ(ierr); 4168 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4169 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4170 } 4171 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4172 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4173 ierr = MatBindToCPU(pcbddc->local_mat,oldpin);CHKERRQ(ierr); 4174 #endif 4175 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4176 } 4177 4178 /* Matrix of coarse basis functions (local) */ 4179 if (pcbddc->coarse_phi_B) { 4180 PetscInt on_B,on_primal,on_D=n_D; 4181 if (pcbddc->coarse_phi_D) { 4182 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4183 } 4184 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4185 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4186 PetscScalar *marray; 4187 4188 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4189 ierr = PetscFree(marray);CHKERRQ(ierr); 4190 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4191 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4192 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4193 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4194 } 4195 } 4196 4197 if (!pcbddc->coarse_phi_B) { 4198 PetscScalar *marr; 4199 4200 /* memory size */ 4201 n = n_B*pcbddc->local_primal_size; 4202 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4203 if (!pcbddc->symmetric_primal) n *= 2; 4204 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4205 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4206 marr += n_B*pcbddc->local_primal_size; 4207 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4208 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4209 marr += n_D*pcbddc->local_primal_size; 4210 } 4211 if (!pcbddc->symmetric_primal) { 4212 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4213 marr += n_B*pcbddc->local_primal_size; 4214 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4215 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4216 } 4217 } else { 4218 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4219 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4220 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4221 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4222 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4223 } 4224 } 4225 } 4226 4227 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4228 p0_lidx_I = NULL; 4229 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4230 const PetscInt *idxs; 4231 4232 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4233 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4234 for (i=0;i<pcbddc->benign_n;i++) { 4235 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4236 } 4237 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4238 } 4239 4240 /* vertices */ 4241 if (n_vertices) { 4242 PetscBool restoreavr = PETSC_FALSE; 4243 4244 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4245 4246 if (n_R) { 4247 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4248 PetscBLASInt B_N,B_one = 1; 4249 const PetscScalar *x; 4250 PetscScalar *y; 4251 4252 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4253 if (need_benign_correction) { 4254 ISLocalToGlobalMapping RtoN; 4255 IS is_p0; 4256 PetscInt *idxs_p0,n; 4257 4258 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4259 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4260 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4261 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4262 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4263 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4264 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4265 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4266 } 4267 4268 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4269 if (!sparserhs || need_benign_correction) { 4270 if (lda_rhs == n_R) { 4271 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4272 } else { 4273 PetscScalar *av,*array; 4274 const PetscInt *xadj,*adjncy; 4275 PetscInt n; 4276 PetscBool flg_row; 4277 4278 array = work+lda_rhs*n_vertices; 4279 ierr = PetscArrayzero(array,lda_rhs*n_vertices);CHKERRQ(ierr); 4280 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4281 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4282 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4283 for (i=0;i<n;i++) { 4284 PetscInt j; 4285 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4286 } 4287 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4288 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4289 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4290 } 4291 if (need_benign_correction) { 4292 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4293 PetscScalar *marr; 4294 4295 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4296 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4297 4298 | 0 0 0 | (V) 4299 L = | 0 0 -1 | (P-p0) 4300 | 0 0 -1 | (p0) 4301 4302 */ 4303 for (i=0;i<reuse_solver->benign_n;i++) { 4304 const PetscScalar *vals; 4305 const PetscInt *idxs,*idxs_zero; 4306 PetscInt n,j,nz; 4307 4308 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4309 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4310 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4311 for (j=0;j<n;j++) { 4312 PetscScalar val = vals[j]; 4313 PetscInt k,col = idxs[j]; 4314 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4315 } 4316 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4317 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4318 } 4319 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4320 } 4321 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4322 Brhs = A_RV; 4323 } else { 4324 Mat tA_RVT,A_RVT; 4325 4326 if (!pcbddc->symmetric_primal) { 4327 /* A_RV already scaled by -1 */ 4328 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4329 } else { 4330 restoreavr = PETSC_TRUE; 4331 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4332 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4333 A_RVT = A_VR; 4334 } 4335 if (lda_rhs != n_R) { 4336 PetscScalar *aa; 4337 PetscInt r,*ii,*jj; 4338 PetscBool done; 4339 4340 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4341 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4342 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4343 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4344 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4345 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4346 } else { 4347 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4348 tA_RVT = A_RVT; 4349 } 4350 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4351 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4352 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4353 } 4354 if (F) { 4355 /* need to correct the rhs */ 4356 if (need_benign_correction) { 4357 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4358 PetscScalar *marr; 4359 4360 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4361 if (lda_rhs != n_R) { 4362 for (i=0;i<n_vertices;i++) { 4363 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4364 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4365 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4366 } 4367 } else { 4368 for (i=0;i<n_vertices;i++) { 4369 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4370 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4371 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4372 } 4373 } 4374 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4375 } 4376 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4377 if (restoreavr) { 4378 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4379 } 4380 /* need to correct the solution */ 4381 if (need_benign_correction) { 4382 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4383 PetscScalar *marr; 4384 4385 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4386 if (lda_rhs != n_R) { 4387 for (i=0;i<n_vertices;i++) { 4388 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4389 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4390 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4391 } 4392 } else { 4393 for (i=0;i<n_vertices;i++) { 4394 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4395 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4396 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4397 } 4398 } 4399 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4400 } 4401 } else { 4402 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4403 for (i=0;i<n_vertices;i++) { 4404 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4405 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4406 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4407 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4408 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4409 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4410 } 4411 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4412 } 4413 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4414 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4415 /* S_VV and S_CV */ 4416 if (n_constraints) { 4417 Mat B; 4418 4419 ierr = PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);CHKERRQ(ierr); 4420 for (i=0;i<n_vertices;i++) { 4421 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4422 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4423 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4424 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4425 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4426 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4427 } 4428 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4429 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4430 ierr = MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);CHKERRQ(ierr); 4431 ierr = MatProductSetType(S_CV,MATPRODUCT_AB);CHKERRQ(ierr); 4432 ierr = MatProductSetFromOptions(S_CV);CHKERRQ(ierr); 4433 ierr = MatProductSymbolic(S_CV);CHKERRQ(ierr); 4434 ierr = MatProductNumeric(S_CV);CHKERRQ(ierr); 4435 ierr = MatProductClear(S_CV);CHKERRQ(ierr); 4436 4437 ierr = MatDestroy(&B);CHKERRQ(ierr); 4438 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4439 /* Reuse B = local_auxmat2_R * S_CV */ 4440 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);CHKERRQ(ierr); 4441 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4442 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4443 ierr = MatProductSymbolic(B);CHKERRQ(ierr); 4444 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4445 4446 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4447 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4448 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4449 ierr = MatDestroy(&B);CHKERRQ(ierr); 4450 } 4451 if (lda_rhs != n_R) { 4452 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4453 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4454 ierr = MatDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4455 } 4456 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4457 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4458 if (need_benign_correction) { 4459 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4460 PetscScalar *marr,*sums; 4461 4462 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4463 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4464 for (i=0;i<reuse_solver->benign_n;i++) { 4465 const PetscScalar *vals; 4466 const PetscInt *idxs,*idxs_zero; 4467 PetscInt n,j,nz; 4468 4469 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4470 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4471 for (j=0;j<n_vertices;j++) { 4472 PetscInt k; 4473 sums[j] = 0.; 4474 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4475 } 4476 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4477 for (j=0;j<n;j++) { 4478 PetscScalar val = vals[j]; 4479 PetscInt k; 4480 for (k=0;k<n_vertices;k++) { 4481 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4482 } 4483 } 4484 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4485 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4486 } 4487 ierr = PetscFree(sums);CHKERRQ(ierr); 4488 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4489 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4490 } 4491 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4492 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4493 ierr = MatDenseGetArrayRead(A_VV,&x);CHKERRQ(ierr); 4494 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4495 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4496 ierr = MatDenseRestoreArrayRead(A_VV,&x);CHKERRQ(ierr); 4497 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4498 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4499 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4500 } else { 4501 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4502 } 4503 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4504 4505 /* coarse basis functions */ 4506 for (i=0;i<n_vertices;i++) { 4507 PetscScalar *y; 4508 4509 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4510 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4511 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4512 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4513 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4514 y[n_B*i+idx_V_B[i]] = 1.0; 4515 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4516 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4517 4518 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4519 PetscInt j; 4520 4521 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4522 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4523 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4524 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4525 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4526 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4527 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4528 } 4529 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4530 } 4531 /* if n_R == 0 the object is not destroyed */ 4532 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4533 } 4534 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4535 4536 if (n_constraints) { 4537 Mat B; 4538 4539 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4540 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4541 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);CHKERRQ(ierr); 4542 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4543 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4544 ierr = MatProductSymbolic(B);CHKERRQ(ierr); 4545 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4546 4547 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4548 if (n_vertices) { 4549 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4550 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4551 } else { 4552 Mat S_VCt; 4553 4554 if (lda_rhs != n_R) { 4555 ierr = MatDestroy(&B);CHKERRQ(ierr); 4556 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4557 ierr = MatDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4558 } 4559 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4560 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4561 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4562 } 4563 } 4564 ierr = MatDestroy(&B);CHKERRQ(ierr); 4565 /* coarse basis functions */ 4566 for (i=0;i<n_constraints;i++) { 4567 PetscScalar *y; 4568 4569 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4570 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4571 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4572 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4573 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4574 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4575 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4576 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4577 PetscInt j; 4578 4579 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4580 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4581 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4582 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4583 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4584 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4585 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4586 } 4587 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4588 } 4589 } 4590 if (n_constraints) { 4591 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4592 } 4593 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4594 4595 /* coarse matrix entries relative to B_0 */ 4596 if (pcbddc->benign_n) { 4597 Mat B0_B,B0_BPHI; 4598 IS is_dummy; 4599 const PetscScalar *data; 4600 PetscInt j; 4601 4602 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4603 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4604 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4605 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4606 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4607 ierr = MatDenseGetArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4608 for (j=0;j<pcbddc->benign_n;j++) { 4609 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4610 for (i=0;i<pcbddc->local_primal_size;i++) { 4611 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4612 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4613 } 4614 } 4615 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4616 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4617 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4618 } 4619 4620 /* compute other basis functions for non-symmetric problems */ 4621 if (!pcbddc->symmetric_primal) { 4622 Mat B_V=NULL,B_C=NULL; 4623 PetscScalar *marray; 4624 4625 if (n_constraints) { 4626 Mat S_CCT,C_CRT; 4627 4628 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4629 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4630 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4631 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4632 if (n_vertices) { 4633 Mat S_VCT; 4634 4635 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4636 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4637 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4638 } 4639 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4640 } else { 4641 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4642 } 4643 if (n_vertices && n_R) { 4644 PetscScalar *av,*marray; 4645 const PetscInt *xadj,*adjncy; 4646 PetscInt n; 4647 PetscBool flg_row; 4648 4649 /* B_V = B_V - A_VR^T */ 4650 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4651 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4652 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4653 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4654 for (i=0;i<n;i++) { 4655 PetscInt j; 4656 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4657 } 4658 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4659 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4660 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4661 } 4662 4663 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4664 if (n_vertices) { 4665 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4666 for (i=0;i<n_vertices;i++) { 4667 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4668 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4669 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4670 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4671 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4672 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4673 } 4674 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4675 } 4676 if (B_C) { 4677 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4678 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4679 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4680 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4681 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4682 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4683 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4684 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4685 } 4686 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4687 } 4688 /* coarse basis functions */ 4689 for (i=0;i<pcbddc->local_primal_size;i++) { 4690 PetscScalar *y; 4691 4692 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4693 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4694 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4695 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4696 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4697 if (i<n_vertices) { 4698 y[n_B*i+idx_V_B[i]] = 1.0; 4699 } 4700 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4701 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4702 4703 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4704 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4705 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4706 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4707 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4708 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4709 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4710 } 4711 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4712 } 4713 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4714 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4715 } 4716 4717 /* free memory */ 4718 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4719 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4720 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4721 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4722 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4723 ierr = PetscFree(work);CHKERRQ(ierr); 4724 if (n_vertices) { 4725 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4726 } 4727 if (n_constraints) { 4728 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4729 } 4730 ierr = PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 4731 4732 /* Checking coarse_sub_mat and coarse basis functios */ 4733 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4734 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4735 if (pcbddc->dbg_flag) { 4736 Mat coarse_sub_mat; 4737 Mat AUXMAT,TM1,TM2,TM3,TM4; 4738 Mat coarse_phi_D,coarse_phi_B; 4739 Mat coarse_psi_D,coarse_psi_B; 4740 Mat A_II,A_BB,A_IB,A_BI; 4741 Mat C_B,CPHI; 4742 IS is_dummy; 4743 Vec mones; 4744 MatType checkmattype=MATSEQAIJ; 4745 PetscReal real_value; 4746 4747 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4748 Mat A; 4749 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4750 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4751 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4752 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4753 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4754 ierr = MatDestroy(&A);CHKERRQ(ierr); 4755 } else { 4756 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4757 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4758 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4759 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4760 } 4761 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4762 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4763 if (!pcbddc->symmetric_primal) { 4764 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4765 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4766 } 4767 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4768 4769 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4770 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4771 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4772 if (!pcbddc->symmetric_primal) { 4773 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4774 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4775 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4776 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4777 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4778 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4779 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4780 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4781 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4782 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4783 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4784 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4785 } else { 4786 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4787 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4788 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4789 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4790 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4791 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4792 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4793 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4794 } 4795 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4796 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4797 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4798 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4799 if (pcbddc->benign_n) { 4800 Mat B0_B,B0_BPHI; 4801 const PetscScalar *data2; 4802 PetscScalar *data; 4803 PetscInt j; 4804 4805 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4806 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4807 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4808 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4809 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4810 ierr = MatDenseGetArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4811 for (j=0;j<pcbddc->benign_n;j++) { 4812 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4813 for (i=0;i<pcbddc->local_primal_size;i++) { 4814 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4815 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4816 } 4817 } 4818 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4819 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4820 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4821 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4822 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4823 } 4824 #if 0 4825 { 4826 PetscViewer viewer; 4827 char filename[256]; 4828 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4829 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4830 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4831 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4832 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4833 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4834 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4835 if (pcbddc->coarse_phi_B) { 4836 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4837 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4838 } 4839 if (pcbddc->coarse_phi_D) { 4840 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4841 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4842 } 4843 if (pcbddc->coarse_psi_B) { 4844 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4845 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4846 } 4847 if (pcbddc->coarse_psi_D) { 4848 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4849 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4850 } 4851 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4852 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4853 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4854 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4855 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4856 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4857 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4858 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4859 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4860 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4861 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4862 } 4863 #endif 4864 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4865 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4866 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4867 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4868 4869 /* check constraints */ 4870 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4871 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4872 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4873 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4874 } else { 4875 PetscScalar *data; 4876 Mat tmat; 4877 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4879 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4880 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4881 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4882 } 4883 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4884 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4885 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4886 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4887 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4888 if (!pcbddc->symmetric_primal) { 4889 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4890 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4891 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4892 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4893 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4894 } 4895 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4896 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4897 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4898 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4899 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4900 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4901 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4902 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4903 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4904 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4905 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4906 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4907 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4908 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4909 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4910 if (!pcbddc->symmetric_primal) { 4911 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4912 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4913 } 4914 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4915 } 4916 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4917 { 4918 PetscBool gpu; 4919 4920 ierr = PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);CHKERRQ(ierr); 4921 if (gpu) { 4922 if (pcbddc->local_auxmat1) { 4923 ierr = MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4924 } 4925 if (pcbddc->local_auxmat2) { 4926 ierr = MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4927 } 4928 if (pcbddc->coarse_phi_B) { 4929 ierr = MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4930 } 4931 if (pcbddc->coarse_phi_D) { 4932 ierr = MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4933 } 4934 if (pcbddc->coarse_psi_B) { 4935 ierr = MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4936 } 4937 if (pcbddc->coarse_psi_D) { 4938 ierr = MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4939 } 4940 } 4941 } 4942 /* get back data */ 4943 *coarse_submat_vals_n = coarse_submat_vals; 4944 PetscFunctionReturn(0); 4945 } 4946 4947 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4948 { 4949 Mat *work_mat; 4950 IS isrow_s,iscol_s; 4951 PetscBool rsorted,csorted; 4952 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4953 PetscErrorCode ierr; 4954 4955 PetscFunctionBegin; 4956 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4957 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4958 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4959 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4960 4961 if (!rsorted) { 4962 const PetscInt *idxs; 4963 PetscInt *idxs_sorted,i; 4964 4965 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4966 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4967 for (i=0;i<rsize;i++) { 4968 idxs_perm_r[i] = i; 4969 } 4970 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4971 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4972 for (i=0;i<rsize;i++) { 4973 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4974 } 4975 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4976 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4977 } else { 4978 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4979 isrow_s = isrow; 4980 } 4981 4982 if (!csorted) { 4983 if (isrow == iscol) { 4984 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4985 iscol_s = isrow_s; 4986 } else { 4987 const PetscInt *idxs; 4988 PetscInt *idxs_sorted,i; 4989 4990 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4991 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4992 for (i=0;i<csize;i++) { 4993 idxs_perm_c[i] = i; 4994 } 4995 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4996 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4997 for (i=0;i<csize;i++) { 4998 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4999 } 5000 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 5001 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 5002 } 5003 } else { 5004 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 5005 iscol_s = iscol; 5006 } 5007 5008 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5009 5010 if (!rsorted || !csorted) { 5011 Mat new_mat; 5012 IS is_perm_r,is_perm_c; 5013 5014 if (!rsorted) { 5015 PetscInt *idxs_r,i; 5016 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 5017 for (i=0;i<rsize;i++) { 5018 idxs_r[idxs_perm_r[i]] = i; 5019 } 5020 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 5021 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 5022 } else { 5023 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 5024 } 5025 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 5026 5027 if (!csorted) { 5028 if (isrow_s == iscol_s) { 5029 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 5030 is_perm_c = is_perm_r; 5031 } else { 5032 PetscInt *idxs_c,i; 5033 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5034 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 5035 for (i=0;i<csize;i++) { 5036 idxs_c[idxs_perm_c[i]] = i; 5037 } 5038 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 5039 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 5040 } 5041 } else { 5042 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 5043 } 5044 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 5045 5046 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 5047 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 5048 work_mat[0] = new_mat; 5049 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 5050 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 5051 } 5052 5053 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 5054 *B = work_mat[0]; 5055 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 5056 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 5057 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 5058 PetscFunctionReturn(0); 5059 } 5060 5061 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5062 { 5063 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5064 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5065 Mat new_mat,lA; 5066 IS is_local,is_global; 5067 PetscInt local_size; 5068 PetscBool isseqaij; 5069 PetscErrorCode ierr; 5070 5071 PetscFunctionBegin; 5072 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5073 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 5074 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 5075 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 5076 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 5077 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 5078 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5079 5080 if (pcbddc->dbg_flag) { 5081 Vec x,x_change; 5082 PetscReal error; 5083 5084 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 5085 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5086 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 5087 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5088 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5089 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 5090 if (!pcbddc->change_interior) { 5091 const PetscScalar *x,*y,*v; 5092 PetscReal lerror = 0.; 5093 PetscInt i; 5094 5095 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 5096 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 5097 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 5098 for (i=0;i<local_size;i++) 5099 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5100 lerror = PetscAbsScalar(x[i]-y[i]); 5101 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 5102 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 5103 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 5104 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 5105 if (error > PETSC_SMALL) { 5106 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5107 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 5108 } else { 5109 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 5110 } 5111 } 5112 } 5113 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5114 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5115 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5116 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5117 if (error > PETSC_SMALL) { 5118 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5119 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5120 } else { 5121 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5122 } 5123 } 5124 ierr = VecDestroy(&x);CHKERRQ(ierr); 5125 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5126 } 5127 5128 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5129 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 5130 5131 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5132 ierr = PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5133 if (isseqaij) { 5134 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5135 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5136 if (lA) { 5137 Mat work; 5138 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5139 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5140 ierr = MatDestroy(&work);CHKERRQ(ierr); 5141 } 5142 } else { 5143 Mat work_mat; 5144 5145 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5146 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5147 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5148 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 5149 if (lA) { 5150 Mat work; 5151 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5152 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5153 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5154 ierr = MatDestroy(&work);CHKERRQ(ierr); 5155 } 5156 } 5157 if (matis->A->symmetric_set) { 5158 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 5159 #if !defined(PETSC_USE_COMPLEX) 5160 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 5161 #endif 5162 } 5163 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 5164 PetscFunctionReturn(0); 5165 } 5166 5167 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5168 { 5169 PC_IS* pcis = (PC_IS*)(pc->data); 5170 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5171 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5172 PetscInt *idx_R_local=NULL; 5173 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5174 PetscInt vbs,bs; 5175 PetscBT bitmask=NULL; 5176 PetscErrorCode ierr; 5177 5178 PetscFunctionBegin; 5179 /* 5180 No need to setup local scatters if 5181 - primal space is unchanged 5182 AND 5183 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5184 AND 5185 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5186 */ 5187 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5188 PetscFunctionReturn(0); 5189 } 5190 /* destroy old objects */ 5191 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5192 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5193 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5194 /* Set Non-overlapping dimensions */ 5195 n_B = pcis->n_B; 5196 n_D = pcis->n - n_B; 5197 n_vertices = pcbddc->n_vertices; 5198 5199 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5200 5201 /* create auxiliary bitmask and allocate workspace */ 5202 if (!sub_schurs || !sub_schurs->reuse_solver) { 5203 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5204 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5205 for (i=0;i<n_vertices;i++) { 5206 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5207 } 5208 5209 for (i=0, n_R=0; i<pcis->n; i++) { 5210 if (!PetscBTLookup(bitmask,i)) { 5211 idx_R_local[n_R++] = i; 5212 } 5213 } 5214 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5215 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5216 5217 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5218 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5219 } 5220 5221 /* Block code */ 5222 vbs = 1; 5223 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5224 if (bs>1 && !(n_vertices%bs)) { 5225 PetscBool is_blocked = PETSC_TRUE; 5226 PetscInt *vary; 5227 if (!sub_schurs || !sub_schurs->reuse_solver) { 5228 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5229 ierr = PetscArrayzero(vary,pcis->n/bs);CHKERRQ(ierr); 5230 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5231 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5232 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5233 for (i=0; i<pcis->n/bs; i++) { 5234 if (vary[i]!=0 && vary[i]!=bs) { 5235 is_blocked = PETSC_FALSE; 5236 break; 5237 } 5238 } 5239 ierr = PetscFree(vary);CHKERRQ(ierr); 5240 } else { 5241 /* Verify directly the R set */ 5242 for (i=0; i<n_R/bs; i++) { 5243 PetscInt j,node=idx_R_local[bs*i]; 5244 for (j=1; j<bs; j++) { 5245 if (node != idx_R_local[bs*i+j]-j) { 5246 is_blocked = PETSC_FALSE; 5247 break; 5248 } 5249 } 5250 } 5251 } 5252 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5253 vbs = bs; 5254 for (i=0;i<n_R/vbs;i++) { 5255 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5256 } 5257 } 5258 } 5259 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5260 if (sub_schurs && sub_schurs->reuse_solver) { 5261 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5262 5263 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5264 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5265 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5266 reuse_solver->is_R = pcbddc->is_R_local; 5267 } else { 5268 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5269 } 5270 5271 /* print some info if requested */ 5272 if (pcbddc->dbg_flag) { 5273 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5274 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5275 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5276 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5278 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5280 } 5281 5282 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5283 if (!sub_schurs || !sub_schurs->reuse_solver) { 5284 IS is_aux1,is_aux2; 5285 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5286 5287 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5288 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5289 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5290 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5291 for (i=0; i<n_D; i++) { 5292 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5293 } 5294 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5295 for (i=0, j=0; i<n_R; i++) { 5296 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5297 aux_array1[j++] = i; 5298 } 5299 } 5300 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5301 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5302 for (i=0, j=0; i<n_B; i++) { 5303 if (!PetscBTLookup(bitmask,is_indices[i])) { 5304 aux_array2[j++] = i; 5305 } 5306 } 5307 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5308 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5309 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5310 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5311 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5312 5313 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5314 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5315 for (i=0, j=0; i<n_R; i++) { 5316 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5317 aux_array1[j++] = i; 5318 } 5319 } 5320 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5321 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5322 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5323 } 5324 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5325 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5326 } else { 5327 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5328 IS tis; 5329 PetscInt schur_size; 5330 5331 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5332 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5333 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5334 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5335 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5336 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5337 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5338 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5339 } 5340 } 5341 PetscFunctionReturn(0); 5342 } 5343 5344 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5345 { 5346 MatNullSpace NullSpace; 5347 Mat dmat; 5348 const Vec *nullvecs; 5349 Vec v,v2,*nullvecs2; 5350 VecScatter sct = NULL; 5351 PetscContainer c; 5352 PetscScalar *ddata; 5353 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5354 PetscBool nnsp_has_cnst; 5355 PetscErrorCode ierr; 5356 5357 PetscFunctionBegin; 5358 if (!is && !B) { /* MATIS */ 5359 Mat_IS* matis = (Mat_IS*)A->data; 5360 5361 if (!B) { 5362 ierr = MatISGetLocalMat(A,&B);CHKERRQ(ierr); 5363 } 5364 sct = matis->cctx; 5365 ierr = PetscObjectReference((PetscObject)sct);CHKERRQ(ierr); 5366 } else { 5367 ierr = MatGetNullSpace(B,&NullSpace);CHKERRQ(ierr); 5368 if (!NullSpace) { 5369 ierr = MatGetNearNullSpace(B,&NullSpace);CHKERRQ(ierr); 5370 } 5371 if (NullSpace) PetscFunctionReturn(0); 5372 } 5373 ierr = MatGetNullSpace(A,&NullSpace);CHKERRQ(ierr); 5374 if (!NullSpace) { 5375 ierr = MatGetNearNullSpace(A,&NullSpace);CHKERRQ(ierr); 5376 } 5377 if (!NullSpace) PetscFunctionReturn(0); 5378 5379 ierr = MatCreateVecs(A,&v,NULL);CHKERRQ(ierr); 5380 ierr = MatCreateVecs(B,&v2,NULL);CHKERRQ(ierr); 5381 if (!sct) { 5382 ierr = VecScatterCreate(v,is,v2,NULL,&sct);CHKERRQ(ierr); 5383 } 5384 ierr = MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);CHKERRQ(ierr); 5385 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5386 ierr = PetscMalloc1(bsiz,&nullvecs2);CHKERRQ(ierr); 5387 ierr = VecGetBlockSize(v2,&bs);CHKERRQ(ierr); 5388 ierr = VecGetSize(v2,&N);CHKERRQ(ierr); 5389 ierr = VecGetLocalSize(v2,&n);CHKERRQ(ierr); 5390 ierr = PetscMalloc1(n*bsiz,&ddata);CHKERRQ(ierr); 5391 for (k=0;k<nnsp_size;k++) { 5392 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);CHKERRQ(ierr); 5393 ierr = VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5394 ierr = VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5395 } 5396 if (nnsp_has_cnst) { 5397 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);CHKERRQ(ierr); 5398 ierr = VecSet(nullvecs2[nnsp_size],1.0);CHKERRQ(ierr); 5399 } 5400 ierr = PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);CHKERRQ(ierr); 5401 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);CHKERRQ(ierr); 5402 5403 ierr = MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);CHKERRQ(ierr); 5404 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);CHKERRQ(ierr); 5405 ierr = PetscContainerSetPointer(c,ddata);CHKERRQ(ierr); 5406 ierr = PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);CHKERRQ(ierr); 5407 ierr = PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);CHKERRQ(ierr); 5408 ierr = PetscContainerDestroy(&c);CHKERRQ(ierr); 5409 ierr = PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);CHKERRQ(ierr); 5410 ierr = MatDestroy(&dmat);CHKERRQ(ierr); 5411 5412 for (k=0;k<bsiz;k++) { 5413 ierr = VecDestroy(&nullvecs2[k]);CHKERRQ(ierr); 5414 } 5415 ierr = PetscFree(nullvecs2);CHKERRQ(ierr); 5416 ierr = MatSetNearNullSpace(B,NullSpace);CHKERRQ(ierr); 5417 ierr = MatNullSpaceDestroy(&NullSpace);CHKERRQ(ierr); 5418 ierr = VecDestroy(&v);CHKERRQ(ierr); 5419 ierr = VecDestroy(&v2);CHKERRQ(ierr); 5420 ierr = VecScatterDestroy(&sct);CHKERRQ(ierr); 5421 PetscFunctionReturn(0); 5422 } 5423 5424 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5425 { 5426 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5427 PC_IS *pcis = (PC_IS*)pc->data; 5428 PC pc_temp; 5429 Mat A_RR; 5430 MatNullSpace nnsp; 5431 MatReuse reuse; 5432 PetscScalar m_one = -1.0; 5433 PetscReal value; 5434 PetscInt n_D,n_R; 5435 PetscBool issbaij,opts; 5436 PetscErrorCode ierr; 5437 void (*f)(void) = NULL; 5438 char dir_prefix[256],neu_prefix[256],str_level[16]; 5439 size_t len; 5440 5441 PetscFunctionBegin; 5442 ierr = PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5443 /* approximate solver, propagate NearNullSpace if needed */ 5444 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5445 MatNullSpace gnnsp1,gnnsp2; 5446 PetscBool lhas,ghas; 5447 5448 ierr = MatGetNearNullSpace(pcbddc->local_mat,&nnsp);CHKERRQ(ierr); 5449 ierr = MatGetNearNullSpace(pc->pmat,&gnnsp1);CHKERRQ(ierr); 5450 ierr = MatGetNullSpace(pc->pmat,&gnnsp2);CHKERRQ(ierr); 5451 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5452 ierr = MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 5453 if (!ghas && (gnnsp1 || gnnsp2)) { 5454 ierr = MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);CHKERRQ(ierr); 5455 } 5456 } 5457 5458 /* compute prefixes */ 5459 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5460 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5461 if (!pcbddc->current_level) { 5462 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5463 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5464 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5465 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5466 } else { 5467 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5468 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5469 len -= 15; /* remove "pc_bddc_coarse_" */ 5470 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5471 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5472 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5473 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5474 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5475 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5476 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5477 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5478 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5479 } 5480 5481 /* DIRICHLET PROBLEM */ 5482 if (dirichlet) { 5483 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5484 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5485 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5486 if (pcbddc->dbg_flag) { 5487 Mat A_IIn; 5488 5489 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5490 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5491 pcis->A_II = A_IIn; 5492 } 5493 } 5494 if (pcbddc->local_mat->symmetric_set) { 5495 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5496 } 5497 /* Matrix for Dirichlet problem is pcis->A_II */ 5498 n_D = pcis->n - pcis->n_B; 5499 opts = PETSC_FALSE; 5500 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5501 opts = PETSC_TRUE; 5502 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5503 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5504 /* default */ 5505 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5506 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5507 ierr = PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5508 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5509 if (issbaij) { 5510 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5511 } else { 5512 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5513 } 5514 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 5515 } 5516 ierr = MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);CHKERRQ(ierr); 5517 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);CHKERRQ(ierr); 5518 /* Allow user's customization */ 5519 if (opts) { 5520 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5521 } 5522 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5523 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5524 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);CHKERRQ(ierr); 5525 } 5526 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5527 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5528 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5529 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5530 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5531 const PetscInt *idxs; 5532 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5533 5534 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5535 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5536 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5537 for (i=0;i<nl;i++) { 5538 for (d=0;d<cdim;d++) { 5539 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5540 } 5541 } 5542 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5543 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5544 ierr = PetscFree(scoords);CHKERRQ(ierr); 5545 } 5546 if (sub_schurs && sub_schurs->reuse_solver) { 5547 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5548 5549 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5550 } 5551 5552 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5553 if (!n_D) { 5554 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5555 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5556 } 5557 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5558 /* set ksp_D into pcis data */ 5559 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5560 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5561 pcis->ksp_D = pcbddc->ksp_D; 5562 } 5563 5564 /* NEUMANN PROBLEM */ 5565 A_RR = NULL; 5566 if (neumann) { 5567 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5568 PetscInt ibs,mbs; 5569 PetscBool issbaij, reuse_neumann_solver; 5570 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5571 5572 reuse_neumann_solver = PETSC_FALSE; 5573 if (sub_schurs && sub_schurs->reuse_solver) { 5574 IS iP; 5575 5576 reuse_neumann_solver = PETSC_TRUE; 5577 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5578 if (iP) reuse_neumann_solver = PETSC_FALSE; 5579 } 5580 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5581 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5582 if (pcbddc->ksp_R) { /* already created ksp */ 5583 PetscInt nn_R; 5584 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5585 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5586 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5587 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5588 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5589 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5590 reuse = MAT_INITIAL_MATRIX; 5591 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5592 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5593 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5594 reuse = MAT_INITIAL_MATRIX; 5595 } else { /* safe to reuse the matrix */ 5596 reuse = MAT_REUSE_MATRIX; 5597 } 5598 } 5599 /* last check */ 5600 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5601 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5602 reuse = MAT_INITIAL_MATRIX; 5603 } 5604 } else { /* first time, so we need to create the matrix */ 5605 reuse = MAT_INITIAL_MATRIX; 5606 } 5607 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5608 TODO: Get Rid of these conversions */ 5609 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5610 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5611 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5612 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5613 if (matis->A == pcbddc->local_mat) { 5614 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5615 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5616 } else { 5617 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5618 } 5619 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5620 if (matis->A == pcbddc->local_mat) { 5621 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5622 ierr = MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5623 } else { 5624 ierr = MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5625 } 5626 } 5627 /* extract A_RR */ 5628 if (reuse_neumann_solver) { 5629 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5630 5631 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5632 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5633 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5634 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5635 } else { 5636 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5637 } 5638 } else { 5639 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5640 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5641 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5642 } 5643 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5644 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5645 } 5646 if (pcbddc->local_mat->symmetric_set) { 5647 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5648 } 5649 opts = PETSC_FALSE; 5650 if (!pcbddc->ksp_R) { /* create object if not present */ 5651 opts = PETSC_TRUE; 5652 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5653 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5654 /* default */ 5655 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5656 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5657 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5658 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5659 if (issbaij) { 5660 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5661 } else { 5662 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5663 } 5664 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 5665 } 5666 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5667 ierr = MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);CHKERRQ(ierr); 5668 if (opts) { /* Allow user's customization once */ 5669 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5670 } 5671 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5672 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5673 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);CHKERRQ(ierr); 5674 } 5675 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5676 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5677 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5678 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5679 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5680 const PetscInt *idxs; 5681 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5682 5683 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5684 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5685 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5686 for (i=0;i<nl;i++) { 5687 for (d=0;d<cdim;d++) { 5688 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5689 } 5690 } 5691 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5692 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5693 ierr = PetscFree(scoords);CHKERRQ(ierr); 5694 } 5695 5696 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5697 if (!n_R) { 5698 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5699 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5700 } 5701 /* Reuse solver if it is present */ 5702 if (reuse_neumann_solver) { 5703 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5704 5705 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5706 } 5707 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5708 } 5709 5710 if (pcbddc->dbg_flag) { 5711 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5712 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5713 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5714 } 5715 ierr = PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5716 5717 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5718 if (pcbddc->NullSpace_corr[0]) { 5719 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5720 } 5721 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5722 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5723 } 5724 if (neumann && pcbddc->NullSpace_corr[2]) { 5725 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5726 } 5727 /* check Dirichlet and Neumann solvers */ 5728 if (pcbddc->dbg_flag) { 5729 if (dirichlet) { /* Dirichlet */ 5730 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5731 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5732 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5733 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 5734 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5735 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5736 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5737 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5738 } 5739 if (neumann) { /* Neumann */ 5740 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5741 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5742 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5743 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 5744 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5745 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5746 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5747 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5748 } 5749 } 5750 /* free Neumann problem's matrix */ 5751 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5752 PetscFunctionReturn(0); 5753 } 5754 5755 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5756 { 5757 PetscErrorCode ierr; 5758 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5759 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5760 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5761 5762 PetscFunctionBegin; 5763 if (!reuse_solver) { 5764 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5765 } 5766 if (!pcbddc->switch_static) { 5767 if (applytranspose && pcbddc->local_auxmat1) { 5768 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5769 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5770 } 5771 if (!reuse_solver) { 5772 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5773 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5774 } else { 5775 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5776 5777 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5778 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5779 } 5780 } else { 5781 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5782 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5783 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5784 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5785 if (applytranspose && pcbddc->local_auxmat1) { 5786 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5787 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5788 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5789 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5790 } 5791 } 5792 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);CHKERRQ(ierr); 5793 if (!reuse_solver || pcbddc->switch_static) { 5794 if (applytranspose) { 5795 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5796 } else { 5797 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5798 } 5799 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);CHKERRQ(ierr); 5800 } else { 5801 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5802 5803 if (applytranspose) { 5804 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5805 } else { 5806 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5807 } 5808 } 5809 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);CHKERRQ(ierr); 5810 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5811 if (!pcbddc->switch_static) { 5812 if (!reuse_solver) { 5813 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5814 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5815 } else { 5816 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5817 5818 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5819 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5820 } 5821 if (!applytranspose && pcbddc->local_auxmat1) { 5822 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5823 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5824 } 5825 } else { 5826 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5827 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5828 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5829 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5830 if (!applytranspose && pcbddc->local_auxmat1) { 5831 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5832 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5833 } 5834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5836 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5837 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5838 } 5839 PetscFunctionReturn(0); 5840 } 5841 5842 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5843 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5844 { 5845 PetscErrorCode ierr; 5846 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5847 PC_IS* pcis = (PC_IS*) (pc->data); 5848 const PetscScalar zero = 0.0; 5849 5850 PetscFunctionBegin; 5851 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5852 if (!pcbddc->benign_apply_coarse_only) { 5853 if (applytranspose) { 5854 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5855 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5856 } else { 5857 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5858 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5859 } 5860 } else { 5861 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5862 } 5863 5864 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5865 if (pcbddc->benign_n) { 5866 PetscScalar *array; 5867 PetscInt j; 5868 5869 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5870 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5871 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5872 } 5873 5874 /* start communications from local primal nodes to rhs of coarse solver */ 5875 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5876 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5877 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5878 5879 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5880 if (pcbddc->coarse_ksp) { 5881 Mat coarse_mat; 5882 Vec rhs,sol; 5883 MatNullSpace nullsp; 5884 PetscBool isbddc = PETSC_FALSE; 5885 5886 if (pcbddc->benign_have_null) { 5887 PC coarse_pc; 5888 5889 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5890 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5891 /* we need to propagate to coarser levels the need for a possible benign correction */ 5892 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5893 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5894 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5895 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5896 } 5897 } 5898 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5899 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5900 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5901 if (applytranspose) { 5902 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5903 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5904 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5905 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5906 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5907 ierr = MatGetTransposeNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5908 if (nullsp) { 5909 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5910 } 5911 } else { 5912 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5913 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5914 PC coarse_pc; 5915 5916 if (nullsp) { 5917 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5918 } 5919 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5920 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5921 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5922 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5923 } else { 5924 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5925 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5926 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5927 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5928 if (nullsp) { 5929 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5930 } 5931 } 5932 } 5933 /* we don't need the benign correction at coarser levels anymore */ 5934 if (pcbddc->benign_have_null && isbddc) { 5935 PC coarse_pc; 5936 PC_BDDC* coarsepcbddc; 5937 5938 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5939 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5940 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5941 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5942 } 5943 } 5944 5945 /* Local solution on R nodes */ 5946 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5947 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5948 } 5949 /* communications from coarse sol to local primal nodes */ 5950 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5951 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5952 5953 /* Sum contributions from the two levels */ 5954 if (!pcbddc->benign_apply_coarse_only) { 5955 if (applytranspose) { 5956 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5957 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5958 } else { 5959 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5960 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5961 } 5962 /* store p0 */ 5963 if (pcbddc->benign_n) { 5964 PetscScalar *array; 5965 PetscInt j; 5966 5967 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5968 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5969 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5970 } 5971 } else { /* expand the coarse solution */ 5972 if (applytranspose) { 5973 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5974 } else { 5975 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5976 } 5977 } 5978 PetscFunctionReturn(0); 5979 } 5980 5981 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5982 { 5983 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5984 Vec from,to; 5985 const PetscScalar *array; 5986 PetscErrorCode ierr; 5987 5988 PetscFunctionBegin; 5989 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5990 from = pcbddc->coarse_vec; 5991 to = pcbddc->vec1_P; 5992 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5993 Vec tvec; 5994 5995 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5996 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5997 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5998 ierr = VecGetArrayRead(tvec,&array);CHKERRQ(ierr); 5999 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 6000 ierr = VecRestoreArrayRead(tvec,&array);CHKERRQ(ierr); 6001 } 6002 } else { /* from local to global -> put data in coarse right hand side */ 6003 from = pcbddc->vec1_P; 6004 to = pcbddc->coarse_vec; 6005 } 6006 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6007 PetscFunctionReturn(0); 6008 } 6009 6010 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6011 { 6012 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 6013 Vec from,to; 6014 const PetscScalar *array; 6015 PetscErrorCode ierr; 6016 6017 PetscFunctionBegin; 6018 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6019 from = pcbddc->coarse_vec; 6020 to = pcbddc->vec1_P; 6021 } else { /* from local to global -> put data in coarse right hand side */ 6022 from = pcbddc->vec1_P; 6023 to = pcbddc->coarse_vec; 6024 } 6025 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6026 if (smode == SCATTER_FORWARD) { 6027 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6028 Vec tvec; 6029 6030 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 6031 ierr = VecGetArrayRead(to,&array);CHKERRQ(ierr); 6032 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 6033 ierr = VecRestoreArrayRead(to,&array);CHKERRQ(ierr); 6034 } 6035 } else { 6036 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6037 ierr = VecResetArray(from);CHKERRQ(ierr); 6038 } 6039 } 6040 PetscFunctionReturn(0); 6041 } 6042 6043 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6044 { 6045 PetscErrorCode ierr; 6046 PC_IS* pcis = (PC_IS*)(pc->data); 6047 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6048 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6049 /* one and zero */ 6050 PetscScalar one=1.0,zero=0.0; 6051 /* space to store constraints and their local indices */ 6052 PetscScalar *constraints_data; 6053 PetscInt *constraints_idxs,*constraints_idxs_B; 6054 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6055 PetscInt *constraints_n; 6056 /* iterators */ 6057 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6058 /* BLAS integers */ 6059 PetscBLASInt lwork,lierr; 6060 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6061 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6062 /* reuse */ 6063 PetscInt olocal_primal_size,olocal_primal_size_cc; 6064 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6065 /* change of basis */ 6066 PetscBool qr_needed; 6067 PetscBT change_basis,qr_needed_idx; 6068 /* auxiliary stuff */ 6069 PetscInt *nnz,*is_indices; 6070 PetscInt ncc; 6071 /* some quantities */ 6072 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6073 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6074 PetscReal tol; /* tolerance for retaining eigenmodes */ 6075 6076 PetscFunctionBegin; 6077 tol = PetscSqrtReal(PETSC_SMALL); 6078 /* Destroy Mat objects computed previously */ 6079 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6080 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6081 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 6082 /* save info on constraints from previous setup (if any) */ 6083 olocal_primal_size = pcbddc->local_primal_size; 6084 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6085 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 6086 ierr = PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);CHKERRQ(ierr); 6087 ierr = PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);CHKERRQ(ierr); 6088 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6089 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6090 6091 if (!pcbddc->adaptive_selection) { 6092 IS ISForVertices,*ISForFaces,*ISForEdges; 6093 MatNullSpace nearnullsp; 6094 const Vec *nearnullvecs; 6095 Vec *localnearnullsp; 6096 PetscScalar *array; 6097 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 6098 PetscBool nnsp_has_cnst; 6099 /* LAPACK working arrays for SVD or POD */ 6100 PetscBool skip_lapack,boolforchange; 6101 PetscScalar *work; 6102 PetscReal *singular_vals; 6103 #if defined(PETSC_USE_COMPLEX) 6104 PetscReal *rwork; 6105 #endif 6106 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6107 PetscBLASInt dummy_int=1; 6108 PetscScalar dummy_scalar=1.; 6109 PetscBool use_pod = PETSC_FALSE; 6110 6111 /* MKL SVD with same input gives different results on different processes! */ 6112 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL) 6113 use_pod = PETSC_TRUE; 6114 #endif 6115 /* Get index sets for faces, edges and vertices from graph */ 6116 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 6117 /* print some info */ 6118 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6119 PetscInt nv; 6120 6121 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6122 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 6123 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6124 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6125 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6126 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 6127 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 6128 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6129 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6130 } 6131 6132 /* free unneeded index sets */ 6133 if (!pcbddc->use_vertices) { 6134 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6135 } 6136 if (!pcbddc->use_edges) { 6137 for (i=0;i<n_ISForEdges;i++) { 6138 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6139 } 6140 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6141 n_ISForEdges = 0; 6142 } 6143 if (!pcbddc->use_faces) { 6144 for (i=0;i<n_ISForFaces;i++) { 6145 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6146 } 6147 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6148 n_ISForFaces = 0; 6149 } 6150 6151 /* check if near null space is attached to global mat */ 6152 if (pcbddc->use_nnsp) { 6153 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 6154 } else nearnullsp = NULL; 6155 6156 if (nearnullsp) { 6157 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 6158 /* remove any stored info */ 6159 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 6160 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6161 /* store information for BDDC solver reuse */ 6162 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 6163 pcbddc->onearnullspace = nearnullsp; 6164 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6165 for (i=0;i<nnsp_size;i++) { 6166 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 6167 } 6168 } else { /* if near null space is not provided BDDC uses constants by default */ 6169 nnsp_size = 0; 6170 nnsp_has_cnst = PETSC_TRUE; 6171 } 6172 /* get max number of constraints on a single cc */ 6173 max_constraints = nnsp_size; 6174 if (nnsp_has_cnst) max_constraints++; 6175 6176 /* 6177 Evaluate maximum storage size needed by the procedure 6178 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6179 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6180 There can be multiple constraints per connected component 6181 */ 6182 n_vertices = 0; 6183 if (ISForVertices) { 6184 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 6185 } 6186 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6187 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 6188 6189 total_counts = n_ISForFaces+n_ISForEdges; 6190 total_counts *= max_constraints; 6191 total_counts += n_vertices; 6192 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 6193 6194 total_counts = 0; 6195 max_size_of_constraint = 0; 6196 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6197 IS used_is; 6198 if (i<n_ISForEdges) { 6199 used_is = ISForEdges[i]; 6200 } else { 6201 used_is = ISForFaces[i-n_ISForEdges]; 6202 } 6203 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 6204 total_counts += j; 6205 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6206 } 6207 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 6208 6209 /* get local part of global near null space vectors */ 6210 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 6211 for (k=0;k<nnsp_size;k++) { 6212 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 6213 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6214 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6215 } 6216 6217 /* whether or not to skip lapack calls */ 6218 skip_lapack = PETSC_TRUE; 6219 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6220 6221 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6222 if (!skip_lapack) { 6223 PetscScalar temp_work; 6224 6225 if (use_pod) { 6226 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6227 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 6228 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 6229 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 6230 #if defined(PETSC_USE_COMPLEX) 6231 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 6232 #endif 6233 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6234 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6235 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 6236 lwork = -1; 6237 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6238 #if !defined(PETSC_USE_COMPLEX) 6239 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6240 #else 6241 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6242 #endif 6243 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6244 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6245 } else { 6246 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6247 /* SVD */ 6248 PetscInt max_n,min_n; 6249 max_n = max_size_of_constraint; 6250 min_n = max_constraints; 6251 if (max_size_of_constraint < max_constraints) { 6252 min_n = max_size_of_constraint; 6253 max_n = max_constraints; 6254 } 6255 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 6256 #if defined(PETSC_USE_COMPLEX) 6257 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 6258 #endif 6259 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6260 lwork = -1; 6261 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 6262 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 6263 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 6264 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6265 #if !defined(PETSC_USE_COMPLEX) 6266 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6267 #else 6268 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6269 #endif 6270 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6271 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6272 #else 6273 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6274 #endif /* on missing GESVD */ 6275 } 6276 /* Allocate optimal workspace */ 6277 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 6278 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 6279 } 6280 /* Now we can loop on constraining sets */ 6281 total_counts = 0; 6282 constraints_idxs_ptr[0] = 0; 6283 constraints_data_ptr[0] = 0; 6284 /* vertices */ 6285 if (n_vertices) { 6286 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6287 ierr = PetscArraycpy(constraints_idxs,is_indices,n_vertices);CHKERRQ(ierr); 6288 for (i=0;i<n_vertices;i++) { 6289 constraints_n[total_counts] = 1; 6290 constraints_data[total_counts] = 1.0; 6291 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6292 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6293 total_counts++; 6294 } 6295 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6296 n_vertices = total_counts; 6297 } 6298 6299 /* edges and faces */ 6300 total_counts_cc = total_counts; 6301 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6302 IS used_is; 6303 PetscBool idxs_copied = PETSC_FALSE; 6304 6305 if (ncc<n_ISForEdges) { 6306 used_is = ISForEdges[ncc]; 6307 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6308 } else { 6309 used_is = ISForFaces[ncc-n_ISForEdges]; 6310 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6311 } 6312 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6313 6314 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6315 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6316 /* change of basis should not be performed on local periodic nodes */ 6317 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6318 if (nnsp_has_cnst) { 6319 PetscScalar quad_value; 6320 6321 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6322 idxs_copied = PETSC_TRUE; 6323 6324 if (!pcbddc->use_nnsp_true) { 6325 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6326 } else { 6327 quad_value = 1.0; 6328 } 6329 for (j=0;j<size_of_constraint;j++) { 6330 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6331 } 6332 temp_constraints++; 6333 total_counts++; 6334 } 6335 for (k=0;k<nnsp_size;k++) { 6336 PetscReal real_value; 6337 PetscScalar *ptr_to_data; 6338 6339 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6340 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6341 for (j=0;j<size_of_constraint;j++) { 6342 ptr_to_data[j] = array[is_indices[j]]; 6343 } 6344 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6345 /* check if array is null on the connected component */ 6346 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6347 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6348 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6349 temp_constraints++; 6350 total_counts++; 6351 if (!idxs_copied) { 6352 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6353 idxs_copied = PETSC_TRUE; 6354 } 6355 } 6356 } 6357 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6358 valid_constraints = temp_constraints; 6359 if (!pcbddc->use_nnsp_true && temp_constraints) { 6360 if (temp_constraints == 1) { /* just normalize the constraint */ 6361 PetscScalar norm,*ptr_to_data; 6362 6363 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6364 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6365 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6366 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6367 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6368 } else { /* perform SVD */ 6369 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6370 6371 if (use_pod) { 6372 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6373 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6374 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6375 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6376 from that computed using LAPACKgesvd 6377 -> This is due to a different computation of eigenvectors in LAPACKheev 6378 -> The quality of the POD-computed basis will be the same */ 6379 ierr = PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);CHKERRQ(ierr); 6380 /* Store upper triangular part of correlation matrix */ 6381 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6382 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6383 for (j=0;j<temp_constraints;j++) { 6384 for (k=0;k<j+1;k++) { 6385 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6386 } 6387 } 6388 /* compute eigenvalues and eigenvectors of correlation matrix */ 6389 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6390 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6391 #if !defined(PETSC_USE_COMPLEX) 6392 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6393 #else 6394 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6395 #endif 6396 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6397 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6398 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6399 j = 0; 6400 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6401 total_counts = total_counts-j; 6402 valid_constraints = temp_constraints-j; 6403 /* scale and copy POD basis into used quadrature memory */ 6404 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6405 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6406 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6407 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6408 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6409 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6410 if (j<temp_constraints) { 6411 PetscInt ii; 6412 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6413 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6414 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6415 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6416 for (k=0;k<temp_constraints-j;k++) { 6417 for (ii=0;ii<size_of_constraint;ii++) { 6418 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6419 } 6420 } 6421 } 6422 } else { 6423 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6424 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6425 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6426 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6427 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6428 #if !defined(PETSC_USE_COMPLEX) 6429 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6430 #else 6431 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6432 #endif 6433 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6434 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6435 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6436 k = temp_constraints; 6437 if (k > size_of_constraint) k = size_of_constraint; 6438 j = 0; 6439 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6440 valid_constraints = k-j; 6441 total_counts = total_counts-temp_constraints+valid_constraints; 6442 #else 6443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6444 #endif /* on missing GESVD */ 6445 } 6446 } 6447 } 6448 /* update pointers information */ 6449 if (valid_constraints) { 6450 constraints_n[total_counts_cc] = valid_constraints; 6451 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6452 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6453 /* set change_of_basis flag */ 6454 if (boolforchange) { 6455 PetscBTSet(change_basis,total_counts_cc); 6456 } 6457 total_counts_cc++; 6458 } 6459 } 6460 /* free workspace */ 6461 if (!skip_lapack) { 6462 ierr = PetscFree(work);CHKERRQ(ierr); 6463 #if defined(PETSC_USE_COMPLEX) 6464 ierr = PetscFree(rwork);CHKERRQ(ierr); 6465 #endif 6466 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6467 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6468 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6469 } 6470 for (k=0;k<nnsp_size;k++) { 6471 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6472 } 6473 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6474 /* free index sets of faces, edges and vertices */ 6475 for (i=0;i<n_ISForFaces;i++) { 6476 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6477 } 6478 if (n_ISForFaces) { 6479 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6480 } 6481 for (i=0;i<n_ISForEdges;i++) { 6482 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6483 } 6484 if (n_ISForEdges) { 6485 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6486 } 6487 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6488 } else { 6489 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6490 6491 total_counts = 0; 6492 n_vertices = 0; 6493 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6494 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6495 } 6496 max_constraints = 0; 6497 total_counts_cc = 0; 6498 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6499 total_counts += pcbddc->adaptive_constraints_n[i]; 6500 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6501 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6502 } 6503 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6504 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6505 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6506 constraints_data = pcbddc->adaptive_constraints_data; 6507 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6508 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6509 total_counts_cc = 0; 6510 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6511 if (pcbddc->adaptive_constraints_n[i]) { 6512 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6513 } 6514 } 6515 6516 max_size_of_constraint = 0; 6517 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6518 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6519 /* Change of basis */ 6520 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6521 if (pcbddc->use_change_of_basis) { 6522 for (i=0;i<sub_schurs->n_subs;i++) { 6523 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6524 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6525 } 6526 } 6527 } 6528 } 6529 pcbddc->local_primal_size = total_counts; 6530 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6531 6532 /* map constraints_idxs in boundary numbering */ 6533 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6534 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6535 6536 /* Create constraint matrix */ 6537 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6538 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6539 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6540 6541 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6542 /* determine if a QR strategy is needed for change of basis */ 6543 qr_needed = pcbddc->use_qr_single; 6544 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6545 total_primal_vertices=0; 6546 pcbddc->local_primal_size_cc = 0; 6547 for (i=0;i<total_counts_cc;i++) { 6548 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6549 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6550 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6551 pcbddc->local_primal_size_cc += 1; 6552 } else if (PetscBTLookup(change_basis,i)) { 6553 for (k=0;k<constraints_n[i];k++) { 6554 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6555 } 6556 pcbddc->local_primal_size_cc += constraints_n[i]; 6557 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6558 PetscBTSet(qr_needed_idx,i); 6559 qr_needed = PETSC_TRUE; 6560 } 6561 } else { 6562 pcbddc->local_primal_size_cc += 1; 6563 } 6564 } 6565 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6566 pcbddc->n_vertices = total_primal_vertices; 6567 /* permute indices in order to have a sorted set of vertices */ 6568 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6569 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6570 ierr = PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);CHKERRQ(ierr); 6571 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6572 6573 /* nonzero structure of constraint matrix */ 6574 /* and get reference dof for local constraints */ 6575 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6576 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6577 6578 j = total_primal_vertices; 6579 total_counts = total_primal_vertices; 6580 cum = total_primal_vertices; 6581 for (i=n_vertices;i<total_counts_cc;i++) { 6582 if (!PetscBTLookup(change_basis,i)) { 6583 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6584 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6585 cum++; 6586 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6587 for (k=0;k<constraints_n[i];k++) { 6588 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6589 nnz[j+k] = size_of_constraint; 6590 } 6591 j += constraints_n[i]; 6592 } 6593 } 6594 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6595 ierr = MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6596 ierr = PetscFree(nnz);CHKERRQ(ierr); 6597 6598 /* set values in constraint matrix */ 6599 for (i=0;i<total_primal_vertices;i++) { 6600 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6601 } 6602 total_counts = total_primal_vertices; 6603 for (i=n_vertices;i<total_counts_cc;i++) { 6604 if (!PetscBTLookup(change_basis,i)) { 6605 PetscInt *cols; 6606 6607 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6608 cols = constraints_idxs+constraints_idxs_ptr[i]; 6609 for (k=0;k<constraints_n[i];k++) { 6610 PetscInt row = total_counts+k; 6611 PetscScalar *vals; 6612 6613 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6614 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6615 } 6616 total_counts += constraints_n[i]; 6617 } 6618 } 6619 /* assembling */ 6620 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6621 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6622 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6623 6624 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6625 if (pcbddc->use_change_of_basis) { 6626 /* dual and primal dofs on a single cc */ 6627 PetscInt dual_dofs,primal_dofs; 6628 /* working stuff for GEQRF */ 6629 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6630 PetscBLASInt lqr_work; 6631 /* working stuff for UNGQR */ 6632 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6633 PetscBLASInt lgqr_work; 6634 /* working stuff for TRTRS */ 6635 PetscScalar *trs_rhs = NULL; 6636 PetscBLASInt Blas_NRHS; 6637 /* pointers for values insertion into change of basis matrix */ 6638 PetscInt *start_rows,*start_cols; 6639 PetscScalar *start_vals; 6640 /* working stuff for values insertion */ 6641 PetscBT is_primal; 6642 PetscInt *aux_primal_numbering_B; 6643 /* matrix sizes */ 6644 PetscInt global_size,local_size; 6645 /* temporary change of basis */ 6646 Mat localChangeOfBasisMatrix; 6647 /* extra space for debugging */ 6648 PetscScalar *dbg_work = NULL; 6649 6650 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6651 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6652 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6653 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6654 /* nonzeros for local mat */ 6655 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6656 if (!pcbddc->benign_change || pcbddc->fake_change) { 6657 for (i=0;i<pcis->n;i++) nnz[i]=1; 6658 } else { 6659 const PetscInt *ii; 6660 PetscInt n; 6661 PetscBool flg_row; 6662 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6663 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6664 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6665 } 6666 for (i=n_vertices;i<total_counts_cc;i++) { 6667 if (PetscBTLookup(change_basis,i)) { 6668 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6669 if (PetscBTLookup(qr_needed_idx,i)) { 6670 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6671 } else { 6672 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6673 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6674 } 6675 } 6676 } 6677 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6678 ierr = MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6679 ierr = PetscFree(nnz);CHKERRQ(ierr); 6680 /* Set interior change in the matrix */ 6681 if (!pcbddc->benign_change || pcbddc->fake_change) { 6682 for (i=0;i<pcis->n;i++) { 6683 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6684 } 6685 } else { 6686 const PetscInt *ii,*jj; 6687 PetscScalar *aa; 6688 PetscInt n; 6689 PetscBool flg_row; 6690 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6691 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6692 for (i=0;i<n;i++) { 6693 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6694 } 6695 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6696 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6697 } 6698 6699 if (pcbddc->dbg_flag) { 6700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6702 } 6703 6704 /* Now we loop on the constraints which need a change of basis */ 6705 /* 6706 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6707 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6708 6709 Basic blocks of change of basis matrix T computed by 6710 6711 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6712 6713 | 1 0 ... 0 s_1/S | 6714 | 0 1 ... 0 s_2/S | 6715 | ... | 6716 | 0 ... 1 s_{n-1}/S | 6717 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6718 6719 with S = \sum_{i=1}^n s_i^2 6720 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6721 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6722 6723 - QR decomposition of constraints otherwise 6724 */ 6725 if (qr_needed && max_size_of_constraint) { 6726 /* space to store Q */ 6727 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6728 /* array to store scaling factors for reflectors */ 6729 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6730 /* first we issue queries for optimal work */ 6731 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6732 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6733 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6734 lqr_work = -1; 6735 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6736 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6737 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6738 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6739 lgqr_work = -1; 6740 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6741 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6742 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6743 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6744 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6745 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6746 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6747 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6748 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6749 /* array to store rhs and solution of triangular solver */ 6750 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6751 /* allocating workspace for check */ 6752 if (pcbddc->dbg_flag) { 6753 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6754 } 6755 } 6756 /* array to store whether a node is primal or not */ 6757 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6758 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6759 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6760 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6761 for (i=0;i<total_primal_vertices;i++) { 6762 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6763 } 6764 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6765 6766 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6767 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6768 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6769 if (PetscBTLookup(change_basis,total_counts)) { 6770 /* get constraint info */ 6771 primal_dofs = constraints_n[total_counts]; 6772 dual_dofs = size_of_constraint-primal_dofs; 6773 6774 if (pcbddc->dbg_flag) { 6775 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6776 } 6777 6778 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6779 6780 /* copy quadrature constraints for change of basis check */ 6781 if (pcbddc->dbg_flag) { 6782 ierr = PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6783 } 6784 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6785 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6786 6787 /* compute QR decomposition of constraints */ 6788 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6789 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6790 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6791 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6792 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6793 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6794 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6795 6796 /* explicitly compute R^-T */ 6797 ierr = PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);CHKERRQ(ierr); 6798 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6799 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6800 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6801 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6802 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6803 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6804 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6805 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6806 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6807 6808 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6809 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6810 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6811 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6812 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6813 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6814 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6815 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6816 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6817 6818 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6819 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6820 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6821 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6822 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6823 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6824 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6825 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6826 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6827 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6828 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6829 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6830 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6831 6832 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6833 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6834 /* insert cols for primal dofs */ 6835 for (j=0;j<primal_dofs;j++) { 6836 start_vals = &qr_basis[j*size_of_constraint]; 6837 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6838 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6839 } 6840 /* insert cols for dual dofs */ 6841 for (j=0,k=0;j<dual_dofs;k++) { 6842 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6843 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6844 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6845 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6846 j++; 6847 } 6848 } 6849 6850 /* check change of basis */ 6851 if (pcbddc->dbg_flag) { 6852 PetscInt ii,jj; 6853 PetscBool valid_qr=PETSC_TRUE; 6854 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6855 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6856 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6857 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6858 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6859 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6860 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6861 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6862 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6863 for (jj=0;jj<size_of_constraint;jj++) { 6864 for (ii=0;ii<primal_dofs;ii++) { 6865 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6866 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6867 } 6868 } 6869 if (!valid_qr) { 6870 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6871 for (jj=0;jj<size_of_constraint;jj++) { 6872 for (ii=0;ii<primal_dofs;ii++) { 6873 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6874 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6875 } 6876 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6878 } 6879 } 6880 } 6881 } else { 6882 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6883 } 6884 } 6885 } else { /* simple transformation block */ 6886 PetscInt row,col; 6887 PetscScalar val,norm; 6888 6889 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6890 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6891 for (j=0;j<size_of_constraint;j++) { 6892 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6893 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6894 if (!PetscBTLookup(is_primal,row_B)) { 6895 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6896 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6897 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6898 } else { 6899 for (k=0;k<size_of_constraint;k++) { 6900 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6901 if (row != col) { 6902 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6903 } else { 6904 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6905 } 6906 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6907 } 6908 } 6909 } 6910 if (pcbddc->dbg_flag) { 6911 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6912 } 6913 } 6914 } else { 6915 if (pcbddc->dbg_flag) { 6916 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6917 } 6918 } 6919 } 6920 6921 /* free workspace */ 6922 if (qr_needed) { 6923 if (pcbddc->dbg_flag) { 6924 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6925 } 6926 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6927 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6928 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6929 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6930 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6931 } 6932 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6933 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6934 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6935 6936 /* assembling of global change of variable */ 6937 if (!pcbddc->fake_change) { 6938 Mat tmat; 6939 PetscInt bs; 6940 6941 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6942 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6943 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6944 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6945 ierr = MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6946 ierr = MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6947 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6948 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6949 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6950 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6951 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6952 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6953 ierr = MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6954 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6955 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6956 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6957 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6958 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6959 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6960 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6961 6962 /* check */ 6963 if (pcbddc->dbg_flag) { 6964 PetscReal error; 6965 Vec x,x_change; 6966 6967 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6968 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6969 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6970 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6971 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6972 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6973 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6974 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6975 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6976 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6977 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6978 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6979 if (error > PETSC_SMALL) { 6980 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6981 } 6982 ierr = VecDestroy(&x);CHKERRQ(ierr); 6983 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6984 } 6985 /* adapt sub_schurs computed (if any) */ 6986 if (pcbddc->use_deluxe_scaling) { 6987 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6988 6989 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6990 if (sub_schurs && sub_schurs->S_Ej_all) { 6991 Mat S_new,tmat; 6992 IS is_all_N,is_V_Sall = NULL; 6993 6994 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6995 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6996 if (pcbddc->deluxe_zerorows) { 6997 ISLocalToGlobalMapping NtoSall; 6998 IS is_V; 6999 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 7000 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 7001 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 7002 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 7003 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 7004 } 7005 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 7006 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7007 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 7008 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7009 if (pcbddc->deluxe_zerorows) { 7010 const PetscScalar *array; 7011 const PetscInt *idxs_V,*idxs_all; 7012 PetscInt i,n_V; 7013 7014 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7015 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 7016 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7017 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7018 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 7019 for (i=0;i<n_V;i++) { 7020 PetscScalar val; 7021 PetscInt idx; 7022 7023 idx = idxs_V[i]; 7024 val = array[idxs_all[idxs_V[i]]]; 7025 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 7026 } 7027 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7028 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7029 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 7030 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7031 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7032 } 7033 sub_schurs->S_Ej_all = S_new; 7034 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7035 if (sub_schurs->sum_S_Ej_all) { 7036 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7037 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 7038 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7039 if (pcbddc->deluxe_zerorows) { 7040 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7041 } 7042 sub_schurs->sum_S_Ej_all = S_new; 7043 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7044 } 7045 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 7046 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 7047 } 7048 /* destroy any change of basis context in sub_schurs */ 7049 if (sub_schurs && sub_schurs->change) { 7050 PetscInt i; 7051 7052 for (i=0;i<sub_schurs->n_subs;i++) { 7053 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 7054 } 7055 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 7056 } 7057 } 7058 if (pcbddc->switch_static) { /* need to save the local change */ 7059 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7060 } else { 7061 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 7062 } 7063 /* determine if any process has changed the pressures locally */ 7064 pcbddc->change_interior = pcbddc->benign_have_null; 7065 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7066 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 7067 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7068 pcbddc->use_qr_single = qr_needed; 7069 } 7070 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7071 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7072 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 7073 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7074 } else { 7075 Mat benign_global = NULL; 7076 if (pcbddc->benign_have_null) { 7077 Mat M; 7078 7079 pcbddc->change_interior = PETSC_TRUE; 7080 ierr = VecCopy(matis->counter,pcis->vec1_N);CHKERRQ(ierr); 7081 ierr = VecReciprocal(pcis->vec1_N);CHKERRQ(ierr); 7082 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);CHKERRQ(ierr); 7083 if (pcbddc->benign_change) { 7084 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 7085 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 7086 } else { 7087 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);CHKERRQ(ierr); 7088 ierr = MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);CHKERRQ(ierr); 7089 } 7090 ierr = MatISSetLocalMat(benign_global,M);CHKERRQ(ierr); 7091 ierr = MatDestroy(&M);CHKERRQ(ierr); 7092 ierr = MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7093 ierr = MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7094 } 7095 if (pcbddc->user_ChangeOfBasisMatrix) { 7096 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 7097 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 7098 } else if (pcbddc->benign_have_null) { 7099 pcbddc->ChangeOfBasisMatrix = benign_global; 7100 } 7101 } 7102 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7103 IS is_global; 7104 const PetscInt *gidxs; 7105 7106 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7107 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 7108 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7109 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 7110 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 7111 } 7112 } 7113 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7114 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 7115 } 7116 7117 if (!pcbddc->fake_change) { 7118 /* add pressure dofs to set of primal nodes for numbering purposes */ 7119 for (i=0;i<pcbddc->benign_n;i++) { 7120 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7121 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7122 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7123 pcbddc->local_primal_size_cc++; 7124 pcbddc->local_primal_size++; 7125 } 7126 7127 /* check if a new primal space has been introduced (also take into account benign trick) */ 7128 pcbddc->new_primal_space_local = PETSC_TRUE; 7129 if (olocal_primal_size == pcbddc->local_primal_size) { 7130 ierr = PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7131 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7132 if (!pcbddc->new_primal_space_local) { 7133 ierr = PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7134 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7135 } 7136 } 7137 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7138 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 7139 } 7140 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 7141 7142 /* flush dbg viewer */ 7143 if (pcbddc->dbg_flag) { 7144 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7145 } 7146 7147 /* free workspace */ 7148 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 7149 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 7150 if (!pcbddc->adaptive_selection) { 7151 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 7152 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 7153 } else { 7154 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 7155 pcbddc->adaptive_constraints_idxs_ptr, 7156 pcbddc->adaptive_constraints_data_ptr, 7157 pcbddc->adaptive_constraints_idxs, 7158 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 7159 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 7160 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 7161 } 7162 PetscFunctionReturn(0); 7163 } 7164 7165 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7166 { 7167 ISLocalToGlobalMapping map; 7168 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7169 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7170 PetscInt i,N; 7171 PetscBool rcsr = PETSC_FALSE; 7172 PetscErrorCode ierr; 7173 7174 PetscFunctionBegin; 7175 if (pcbddc->recompute_topography) { 7176 pcbddc->graphanalyzed = PETSC_FALSE; 7177 /* Reset previously computed graph */ 7178 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 7179 /* Init local Graph struct */ 7180 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 7181 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 7182 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 7183 7184 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7185 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7186 } 7187 /* Check validity of the csr graph passed in by the user */ 7188 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7189 7190 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7191 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7192 PetscInt *xadj,*adjncy; 7193 PetscInt nvtxs; 7194 PetscBool flg_row=PETSC_FALSE; 7195 7196 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7197 if (flg_row) { 7198 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 7199 pcbddc->computed_rowadj = PETSC_TRUE; 7200 } 7201 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7202 rcsr = PETSC_TRUE; 7203 } 7204 if (pcbddc->dbg_flag) { 7205 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7206 } 7207 7208 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7209 PetscReal *lcoords; 7210 PetscInt n; 7211 MPI_Datatype dimrealtype; 7212 7213 /* TODO: support for blocked */ 7214 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7215 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7216 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 7217 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRMPI(ierr); 7218 ierr = MPI_Type_commit(&dimrealtype);CHKERRMPI(ierr); 7219 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);CHKERRQ(ierr); 7220 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);CHKERRQ(ierr); 7221 ierr = MPI_Type_free(&dimrealtype);CHKERRMPI(ierr); 7222 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 7223 7224 pcbddc->mat_graph->coords = lcoords; 7225 pcbddc->mat_graph->cloc = PETSC_TRUE; 7226 pcbddc->mat_graph->cnloc = n; 7227 } 7228 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7229 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7230 7231 /* Setup of Graph */ 7232 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7233 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7234 7235 /* attach info on disconnected subdomains if present */ 7236 if (pcbddc->n_local_subs) { 7237 PetscInt *local_subs,n,totn; 7238 7239 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7240 ierr = PetscMalloc1(n,&local_subs);CHKERRQ(ierr); 7241 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7242 for (i=0;i<pcbddc->n_local_subs;i++) { 7243 const PetscInt *idxs; 7244 PetscInt nl,j; 7245 7246 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 7247 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7248 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7249 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7250 } 7251 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7252 pcbddc->mat_graph->n_local_subs = totn + 1; 7253 pcbddc->mat_graph->local_subs = local_subs; 7254 } 7255 } 7256 7257 if (!pcbddc->graphanalyzed) { 7258 /* Graph's connected components analysis */ 7259 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 7260 pcbddc->graphanalyzed = PETSC_TRUE; 7261 pcbddc->corner_selected = pcbddc->corner_selection; 7262 } 7263 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7264 PetscFunctionReturn(0); 7265 } 7266 7267 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7268 { 7269 PetscInt i,j,n; 7270 PetscScalar *alphas; 7271 PetscReal norm,*onorms; 7272 PetscErrorCode ierr; 7273 7274 PetscFunctionBegin; 7275 n = *nio; 7276 if (!n) PetscFunctionReturn(0); 7277 ierr = PetscMalloc2(n,&alphas,n,&onorms);CHKERRQ(ierr); 7278 ierr = VecNormalize(vecs[0],&norm);CHKERRQ(ierr); 7279 if (norm < PETSC_SMALL) { 7280 onorms[0] = 0.0; 7281 ierr = VecSet(vecs[0],0.0);CHKERRQ(ierr); 7282 } else { 7283 onorms[0] = norm; 7284 } 7285 7286 for (i=1;i<n;i++) { 7287 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 7288 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7289 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 7290 ierr = VecNormalize(vecs[i],&norm);CHKERRQ(ierr); 7291 if (norm < PETSC_SMALL) { 7292 onorms[i] = 0.0; 7293 ierr = VecSet(vecs[i],0.0);CHKERRQ(ierr); 7294 } else { 7295 onorms[i] = norm; 7296 } 7297 } 7298 /* push nonzero vectors at the beginning */ 7299 for (i=0;i<n;i++) { 7300 if (onorms[i] == 0.0) { 7301 for (j=i+1;j<n;j++) { 7302 if (onorms[j] != 0.0) { 7303 ierr = VecCopy(vecs[j],vecs[i]);CHKERRQ(ierr); 7304 onorms[j] = 0.0; 7305 } 7306 } 7307 } 7308 } 7309 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7310 ierr = PetscFree2(alphas,onorms);CHKERRQ(ierr); 7311 PetscFunctionReturn(0); 7312 } 7313 7314 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7315 { 7316 Mat A; 7317 PetscInt n_neighs,*neighs,*n_shared,**shared; 7318 PetscMPIInt size,rank,color; 7319 PetscInt *xadj,*adjncy; 7320 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7321 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7322 PetscInt void_procs,*procs_candidates = NULL; 7323 PetscInt xadj_count,*count; 7324 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7325 PetscSubcomm psubcomm; 7326 MPI_Comm subcomm; 7327 PetscErrorCode ierr; 7328 7329 PetscFunctionBegin; 7330 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7331 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7332 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7333 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7334 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7335 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7336 7337 if (have_void) *have_void = PETSC_FALSE; 7338 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRMPI(ierr); 7339 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 7340 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7341 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7342 im_active = !!n; 7343 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 7344 void_procs = size - active_procs; 7345 /* get ranks of of non-active processes in mat communicator */ 7346 if (void_procs) { 7347 PetscInt ncand; 7348 7349 if (have_void) *have_void = PETSC_TRUE; 7350 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7351 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 7352 for (i=0,ncand=0;i<size;i++) { 7353 if (!procs_candidates[i]) { 7354 procs_candidates[ncand++] = i; 7355 } 7356 } 7357 /* force n_subdomains to be not greater that the number of non-active processes */ 7358 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7359 } 7360 7361 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7362 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7363 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7364 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7365 PetscInt issize,isidx,dest; 7366 if (*n_subdomains == 1) dest = 0; 7367 else dest = rank; 7368 if (im_active) { 7369 issize = 1; 7370 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7371 isidx = procs_candidates[dest]; 7372 } else { 7373 isidx = dest; 7374 } 7375 } else { 7376 issize = 0; 7377 isidx = -1; 7378 } 7379 if (*n_subdomains != 1) *n_subdomains = active_procs; 7380 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7381 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7382 PetscFunctionReturn(0); 7383 } 7384 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7385 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7386 threshold = PetscMax(threshold,2); 7387 7388 /* Get info on mapping */ 7389 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7390 7391 /* build local CSR graph of subdomains' connectivity */ 7392 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7393 xadj[0] = 0; 7394 xadj[1] = PetscMax(n_neighs-1,0); 7395 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7396 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7397 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7398 for (i=1;i<n_neighs;i++) 7399 for (j=0;j<n_shared[i];j++) 7400 count[shared[i][j]] += 1; 7401 7402 xadj_count = 0; 7403 for (i=1;i<n_neighs;i++) { 7404 for (j=0;j<n_shared[i];j++) { 7405 if (count[shared[i][j]] < threshold) { 7406 adjncy[xadj_count] = neighs[i]; 7407 adjncy_wgt[xadj_count] = n_shared[i]; 7408 xadj_count++; 7409 break; 7410 } 7411 } 7412 } 7413 xadj[1] = xadj_count; 7414 ierr = PetscFree(count);CHKERRQ(ierr); 7415 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7416 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7417 7418 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7419 7420 /* Restrict work on active processes only */ 7421 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7422 if (void_procs) { 7423 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7424 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7425 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7426 subcomm = PetscSubcommChild(psubcomm); 7427 } else { 7428 psubcomm = NULL; 7429 subcomm = PetscObjectComm((PetscObject)mat); 7430 } 7431 7432 v_wgt = NULL; 7433 if (!color) { 7434 ierr = PetscFree(xadj);CHKERRQ(ierr); 7435 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7436 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7437 } else { 7438 Mat subdomain_adj; 7439 IS new_ranks,new_ranks_contig; 7440 MatPartitioning partitioner; 7441 PetscInt rstart=0,rend=0; 7442 PetscInt *is_indices,*oldranks; 7443 PetscMPIInt size; 7444 PetscBool aggregate; 7445 7446 ierr = MPI_Comm_size(subcomm,&size);CHKERRMPI(ierr); 7447 if (void_procs) { 7448 PetscInt prank = rank; 7449 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7450 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRMPI(ierr); 7451 for (i=0;i<xadj[1];i++) { 7452 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7453 } 7454 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7455 } else { 7456 oldranks = NULL; 7457 } 7458 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7459 if (aggregate) { /* TODO: all this part could be made more efficient */ 7460 PetscInt lrows,row,ncols,*cols; 7461 PetscMPIInt nrank; 7462 PetscScalar *vals; 7463 7464 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRMPI(ierr); 7465 lrows = 0; 7466 if (nrank<redprocs) { 7467 lrows = size/redprocs; 7468 if (nrank<size%redprocs) lrows++; 7469 } 7470 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7471 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7472 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7473 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7474 row = nrank; 7475 ncols = xadj[1]-xadj[0]; 7476 cols = adjncy; 7477 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7478 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7479 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7480 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7481 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7482 ierr = PetscFree(xadj);CHKERRQ(ierr); 7483 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7484 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7485 ierr = PetscFree(vals);CHKERRQ(ierr); 7486 if (use_vwgt) { 7487 Vec v; 7488 const PetscScalar *array; 7489 PetscInt nl; 7490 7491 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7492 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7493 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7494 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7495 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7496 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7497 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7498 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7499 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7500 ierr = VecDestroy(&v);CHKERRQ(ierr); 7501 } 7502 } else { 7503 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7504 if (use_vwgt) { 7505 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7506 v_wgt[0] = n; 7507 } 7508 } 7509 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7510 7511 /* Partition */ 7512 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7513 #if defined(PETSC_HAVE_PTSCOTCH) 7514 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);CHKERRQ(ierr); 7515 #elif defined(PETSC_HAVE_PARMETIS) 7516 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);CHKERRQ(ierr); 7517 #else 7518 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);CHKERRQ(ierr); 7519 #endif 7520 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7521 if (v_wgt) { 7522 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7523 } 7524 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7525 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7526 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7527 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7528 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7529 7530 /* renumber new_ranks to avoid "holes" in new set of processors */ 7531 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7532 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7533 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7534 if (!aggregate) { 7535 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7536 if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7537 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7538 } else if (oldranks) { 7539 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7540 } else { 7541 ranks_send_to_idx[0] = is_indices[0]; 7542 } 7543 } else { 7544 PetscInt idx = 0; 7545 PetscMPIInt tag; 7546 MPI_Request *reqs; 7547 7548 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7549 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7550 for (i=rstart;i<rend;i++) { 7551 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRMPI(ierr); 7552 } 7553 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRMPI(ierr); 7554 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 7555 ierr = PetscFree(reqs);CHKERRQ(ierr); 7556 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7557 if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7558 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7559 } else if (oldranks) { 7560 ranks_send_to_idx[0] = oldranks[idx]; 7561 } else { 7562 ranks_send_to_idx[0] = idx; 7563 } 7564 } 7565 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7566 /* clean up */ 7567 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7568 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7569 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7570 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7571 } 7572 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7573 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7574 7575 /* assemble parallel IS for sends */ 7576 i = 1; 7577 if (!color) i=0; 7578 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7579 PetscFunctionReturn(0); 7580 } 7581 7582 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7583 7584 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7585 { 7586 Mat local_mat; 7587 IS is_sends_internal; 7588 PetscInt rows,cols,new_local_rows; 7589 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7590 PetscBool ismatis,isdense,newisdense,destroy_mat; 7591 ISLocalToGlobalMapping l2gmap; 7592 PetscInt* l2gmap_indices; 7593 const PetscInt* is_indices; 7594 MatType new_local_type; 7595 /* buffers */ 7596 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7597 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7598 PetscInt *recv_buffer_idxs_local; 7599 PetscScalar *ptr_vals,*recv_buffer_vals; 7600 const PetscScalar *send_buffer_vals; 7601 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7602 /* MPI */ 7603 MPI_Comm comm,comm_n; 7604 PetscSubcomm subcomm; 7605 PetscMPIInt n_sends,n_recvs,size; 7606 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7607 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7608 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7609 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7610 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7611 PetscErrorCode ierr; 7612 7613 PetscFunctionBegin; 7614 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7615 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7616 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7617 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7618 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7619 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7620 PetscValidLogicalCollectiveBool(mat,reuse,6); 7621 PetscValidLogicalCollectiveInt(mat,nis,8); 7622 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7623 if (nvecs) { 7624 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7625 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7626 } 7627 /* further checks */ 7628 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7629 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7630 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7631 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7632 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7633 if (reuse && *mat_n) { 7634 PetscInt mrows,mcols,mnrows,mncols; 7635 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7636 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7637 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7638 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7639 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7640 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7641 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7642 } 7643 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7644 PetscValidLogicalCollectiveInt(mat,bs,1); 7645 7646 /* prepare IS for sending if not provided */ 7647 if (!is_sends) { 7648 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7649 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7650 } else { 7651 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7652 is_sends_internal = is_sends; 7653 } 7654 7655 /* get comm */ 7656 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7657 7658 /* compute number of sends */ 7659 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7660 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7661 7662 /* compute number of receives */ 7663 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 7664 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 7665 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 7666 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7667 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7668 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7669 ierr = PetscFree(iflags);CHKERRQ(ierr); 7670 7671 /* restrict comm if requested */ 7672 subcomm = NULL; 7673 destroy_mat = PETSC_FALSE; 7674 if (restrict_comm) { 7675 PetscMPIInt color,subcommsize; 7676 7677 color = 0; 7678 if (restrict_full) { 7679 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7680 } else { 7681 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7682 } 7683 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr); 7684 subcommsize = size - subcommsize; 7685 /* check if reuse has been requested */ 7686 if (reuse) { 7687 if (*mat_n) { 7688 PetscMPIInt subcommsize2; 7689 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRMPI(ierr); 7690 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7691 comm_n = PetscObjectComm((PetscObject)*mat_n); 7692 } else { 7693 comm_n = PETSC_COMM_SELF; 7694 } 7695 } else { /* MAT_INITIAL_MATRIX */ 7696 PetscMPIInt rank; 7697 7698 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 7699 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7700 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7701 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7702 comm_n = PetscSubcommChild(subcomm); 7703 } 7704 /* flag to destroy *mat_n if not significative */ 7705 if (color) destroy_mat = PETSC_TRUE; 7706 } else { 7707 comm_n = comm; 7708 } 7709 7710 /* prepare send/receive buffers */ 7711 ierr = PetscMalloc1(size,&ilengths_idxs);CHKERRQ(ierr); 7712 ierr = PetscArrayzero(ilengths_idxs,size);CHKERRQ(ierr); 7713 ierr = PetscMalloc1(size,&ilengths_vals);CHKERRQ(ierr); 7714 ierr = PetscArrayzero(ilengths_vals,size);CHKERRQ(ierr); 7715 if (nis) { 7716 ierr = PetscCalloc1(size,&ilengths_idxs_is);CHKERRQ(ierr); 7717 } 7718 7719 /* Get data from local matrices */ 7720 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7721 /* TODO: See below some guidelines on how to prepare the local buffers */ 7722 /* 7723 send_buffer_vals should contain the raw values of the local matrix 7724 send_buffer_idxs should contain: 7725 - MatType_PRIVATE type 7726 - PetscInt size_of_l2gmap 7727 - PetscInt global_row_indices[size_of_l2gmap] 7728 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7729 */ 7730 else { 7731 ierr = MatDenseGetArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7732 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7733 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7734 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7735 send_buffer_idxs[1] = i; 7736 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7737 ierr = PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);CHKERRQ(ierr); 7738 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7739 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7740 for (i=0;i<n_sends;i++) { 7741 ilengths_vals[is_indices[i]] = len*len; 7742 ilengths_idxs[is_indices[i]] = len+2; 7743 } 7744 } 7745 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7746 /* additional is (if any) */ 7747 if (nis) { 7748 PetscMPIInt psum; 7749 PetscInt j; 7750 for (j=0,psum=0;j<nis;j++) { 7751 PetscInt plen; 7752 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7753 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7754 psum += len+1; /* indices + lenght */ 7755 } 7756 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7757 for (j=0,psum=0;j<nis;j++) { 7758 PetscInt plen; 7759 const PetscInt *is_array_idxs; 7760 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7761 send_buffer_idxs_is[psum] = plen; 7762 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7763 ierr = PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);CHKERRQ(ierr); 7764 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7765 psum += plen+1; /* indices + lenght */ 7766 } 7767 for (i=0;i<n_sends;i++) { 7768 ilengths_idxs_is[is_indices[i]] = psum; 7769 } 7770 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7771 } 7772 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7773 7774 buf_size_idxs = 0; 7775 buf_size_vals = 0; 7776 buf_size_idxs_is = 0; 7777 buf_size_vecs = 0; 7778 for (i=0;i<n_recvs;i++) { 7779 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7780 buf_size_vals += (PetscInt)olengths_vals[i]; 7781 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7782 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7783 } 7784 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7785 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7786 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7787 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7788 7789 /* get new tags for clean communications */ 7790 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7791 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7792 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7793 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7794 7795 /* allocate for requests */ 7796 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7797 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7798 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7799 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7800 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7801 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7802 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7803 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7804 7805 /* communications */ 7806 ptr_idxs = recv_buffer_idxs; 7807 ptr_vals = recv_buffer_vals; 7808 ptr_idxs_is = recv_buffer_idxs_is; 7809 ptr_vecs = recv_buffer_vecs; 7810 for (i=0;i<n_recvs;i++) { 7811 source_dest = onodes[i]; 7812 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRMPI(ierr); 7813 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRMPI(ierr); 7814 ptr_idxs += olengths_idxs[i]; 7815 ptr_vals += olengths_vals[i]; 7816 if (nis) { 7817 source_dest = onodes_is[i]; 7818 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRMPI(ierr); 7819 ptr_idxs_is += olengths_idxs_is[i]; 7820 } 7821 if (nvecs) { 7822 source_dest = onodes[i]; 7823 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRMPI(ierr); 7824 ptr_vecs += olengths_idxs[i]-2; 7825 } 7826 } 7827 for (i=0;i<n_sends;i++) { 7828 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7829 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRMPI(ierr); 7830 ierr = MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRMPI(ierr); 7831 if (nis) { 7832 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRMPI(ierr); 7833 } 7834 if (nvecs) { 7835 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7836 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRMPI(ierr); 7837 } 7838 } 7839 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7840 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7841 7842 /* assemble new l2g map */ 7843 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 7844 ptr_idxs = recv_buffer_idxs; 7845 new_local_rows = 0; 7846 for (i=0;i<n_recvs;i++) { 7847 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7848 ptr_idxs += olengths_idxs[i]; 7849 } 7850 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7851 ptr_idxs = recv_buffer_idxs; 7852 new_local_rows = 0; 7853 for (i=0;i<n_recvs;i++) { 7854 ierr = PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));CHKERRQ(ierr); 7855 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7856 ptr_idxs += olengths_idxs[i]; 7857 } 7858 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7859 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7860 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7861 7862 /* infer new local matrix type from received local matrices type */ 7863 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7864 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7865 if (n_recvs) { 7866 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7867 ptr_idxs = recv_buffer_idxs; 7868 for (i=0;i<n_recvs;i++) { 7869 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7870 new_local_type_private = MATAIJ_PRIVATE; 7871 break; 7872 } 7873 ptr_idxs += olengths_idxs[i]; 7874 } 7875 switch (new_local_type_private) { 7876 case MATDENSE_PRIVATE: 7877 new_local_type = MATSEQAIJ; 7878 bs = 1; 7879 break; 7880 case MATAIJ_PRIVATE: 7881 new_local_type = MATSEQAIJ; 7882 bs = 1; 7883 break; 7884 case MATBAIJ_PRIVATE: 7885 new_local_type = MATSEQBAIJ; 7886 break; 7887 case MATSBAIJ_PRIVATE: 7888 new_local_type = MATSEQSBAIJ; 7889 break; 7890 default: 7891 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7892 } 7893 } else { /* by default, new_local_type is seqaij */ 7894 new_local_type = MATSEQAIJ; 7895 bs = 1; 7896 } 7897 7898 /* create MATIS object if needed */ 7899 if (!reuse) { 7900 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7901 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7902 } else { 7903 /* it also destroys the local matrices */ 7904 if (*mat_n) { 7905 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7906 } else { /* this is a fake object */ 7907 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7908 } 7909 } 7910 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7911 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7912 7913 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 7914 7915 /* Global to local map of received indices */ 7916 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7917 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7918 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7919 7920 /* restore attributes -> type of incoming data and its size */ 7921 buf_size_idxs = 0; 7922 for (i=0;i<n_recvs;i++) { 7923 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7924 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7925 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7926 } 7927 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7928 7929 /* set preallocation */ 7930 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7931 if (!newisdense) { 7932 PetscInt *new_local_nnz=NULL; 7933 7934 ptr_idxs = recv_buffer_idxs_local; 7935 if (n_recvs) { 7936 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7937 } 7938 for (i=0;i<n_recvs;i++) { 7939 PetscInt j; 7940 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7941 for (j=0;j<*(ptr_idxs+1);j++) { 7942 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7943 } 7944 } else { 7945 /* TODO */ 7946 } 7947 ptr_idxs += olengths_idxs[i]; 7948 } 7949 if (new_local_nnz) { 7950 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7951 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7952 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7953 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7954 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7955 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7956 } else { 7957 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7958 } 7959 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7960 } else { 7961 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7962 } 7963 7964 /* set values */ 7965 ptr_vals = recv_buffer_vals; 7966 ptr_idxs = recv_buffer_idxs_local; 7967 for (i=0;i<n_recvs;i++) { 7968 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7969 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7970 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7971 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7972 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7973 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7974 } else { 7975 /* TODO */ 7976 } 7977 ptr_idxs += olengths_idxs[i]; 7978 ptr_vals += olengths_vals[i]; 7979 } 7980 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7981 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7982 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7983 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7984 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7985 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7986 7987 #if 0 7988 if (!restrict_comm) { /* check */ 7989 Vec lvec,rvec; 7990 PetscReal infty_error; 7991 7992 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7993 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7994 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7995 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7996 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7997 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7998 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);CHKERRQ(ierr); 7999 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 8000 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 8001 } 8002 #endif 8003 8004 /* assemble new additional is (if any) */ 8005 if (nis) { 8006 PetscInt **temp_idxs,*count_is,j,psum; 8007 8008 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8009 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 8010 ptr_idxs = recv_buffer_idxs_is; 8011 psum = 0; 8012 for (i=0;i<n_recvs;i++) { 8013 for (j=0;j<nis;j++) { 8014 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8015 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8016 psum += plen; 8017 ptr_idxs += plen+1; /* shift pointer to received data */ 8018 } 8019 } 8020 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 8021 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 8022 for (i=1;i<nis;i++) { 8023 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 8024 } 8025 ierr = PetscArrayzero(count_is,nis);CHKERRQ(ierr); 8026 ptr_idxs = recv_buffer_idxs_is; 8027 for (i=0;i<n_recvs;i++) { 8028 for (j=0;j<nis;j++) { 8029 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8030 ierr = PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);CHKERRQ(ierr); 8031 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8032 ptr_idxs += plen+1; /* shift pointer to received data */ 8033 } 8034 } 8035 for (i=0;i<nis;i++) { 8036 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8037 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr); 8038 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8039 } 8040 ierr = PetscFree(count_is);CHKERRQ(ierr); 8041 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 8042 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 8043 } 8044 /* free workspace */ 8045 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 8046 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8047 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 8048 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8049 if (isdense) { 8050 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 8051 ierr = MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 8052 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 8053 } else { 8054 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 8055 } 8056 if (nis) { 8057 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8058 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 8059 } 8060 8061 if (nvecs) { 8062 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8063 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8064 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8065 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8066 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 8067 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 8068 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 8069 /* set values */ 8070 ptr_vals = recv_buffer_vecs; 8071 ptr_idxs = recv_buffer_idxs_local; 8072 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8073 for (i=0;i<n_recvs;i++) { 8074 PetscInt j; 8075 for (j=0;j<*(ptr_idxs+1);j++) { 8076 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8077 } 8078 ptr_idxs += olengths_idxs[i]; 8079 ptr_vals += olengths_idxs[i]-2; 8080 } 8081 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8082 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 8083 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 8084 } 8085 8086 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 8087 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 8088 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 8089 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 8090 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 8091 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 8092 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 8093 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 8094 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 8095 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 8096 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 8097 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 8098 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 8099 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 8100 ierr = PetscFree(onodes);CHKERRQ(ierr); 8101 if (nis) { 8102 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 8103 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 8104 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 8105 } 8106 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 8107 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 8108 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 8109 for (i=0;i<nis;i++) { 8110 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8111 } 8112 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8113 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8114 } 8115 *mat_n = NULL; 8116 } 8117 PetscFunctionReturn(0); 8118 } 8119 8120 /* temporary hack into ksp private data structure */ 8121 #include <petsc/private/kspimpl.h> 8122 8123 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8124 { 8125 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8126 PC_IS *pcis = (PC_IS*)pc->data; 8127 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8128 Mat coarsedivudotp = NULL; 8129 Mat coarseG,t_coarse_mat_is; 8130 MatNullSpace CoarseNullSpace = NULL; 8131 ISLocalToGlobalMapping coarse_islg; 8132 IS coarse_is,*isarray,corners; 8133 PetscInt i,im_active=-1,active_procs=-1; 8134 PetscInt nis,nisdofs,nisneu,nisvert; 8135 PetscInt coarse_eqs_per_proc; 8136 PC pc_temp; 8137 PCType coarse_pc_type; 8138 KSPType coarse_ksp_type; 8139 PetscBool multilevel_requested,multilevel_allowed; 8140 PetscBool coarse_reuse; 8141 PetscInt ncoarse,nedcfield; 8142 PetscBool compute_vecs = PETSC_FALSE; 8143 PetscScalar *array; 8144 MatReuse coarse_mat_reuse; 8145 PetscBool restr, full_restr, have_void; 8146 PetscMPIInt size; 8147 PetscErrorCode ierr; 8148 8149 PetscFunctionBegin; 8150 ierr = PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8151 /* Assign global numbering to coarse dofs */ 8152 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8153 PetscInt ocoarse_size; 8154 compute_vecs = PETSC_TRUE; 8155 8156 pcbddc->new_primal_space = PETSC_TRUE; 8157 ocoarse_size = pcbddc->coarse_size; 8158 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 8159 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 8160 /* see if we can avoid some work */ 8161 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8162 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8163 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8164 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 8165 coarse_reuse = PETSC_FALSE; 8166 } else { /* we can safely reuse already computed coarse matrix */ 8167 coarse_reuse = PETSC_TRUE; 8168 } 8169 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8170 coarse_reuse = PETSC_FALSE; 8171 } 8172 /* reset any subassembling information */ 8173 if (!coarse_reuse || pcbddc->recompute_topography) { 8174 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8175 } 8176 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8177 coarse_reuse = PETSC_TRUE; 8178 } 8179 if (coarse_reuse && pcbddc->coarse_ksp) { 8180 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 8181 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 8182 coarse_mat_reuse = MAT_REUSE_MATRIX; 8183 } else { 8184 coarse_mat = NULL; 8185 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8186 } 8187 8188 /* creates temporary l2gmap and IS for coarse indexes */ 8189 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 8190 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 8191 8192 /* creates temporary MATIS object for coarse matrix */ 8193 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 8194 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 8195 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 8196 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8197 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8198 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 8199 8200 /* count "active" (i.e. with positive local size) and "void" processes */ 8201 im_active = !!(pcis->n); 8202 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 8203 8204 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8205 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8206 /* full_restr : just use the receivers from the subassembling pattern */ 8207 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 8208 coarse_mat_is = NULL; 8209 multilevel_allowed = PETSC_FALSE; 8210 multilevel_requested = PETSC_FALSE; 8211 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8212 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8213 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8214 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8215 if (multilevel_requested) { 8216 ncoarse = active_procs/pcbddc->coarsening_ratio; 8217 restr = PETSC_FALSE; 8218 full_restr = PETSC_FALSE; 8219 } else { 8220 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8221 restr = PETSC_TRUE; 8222 full_restr = PETSC_TRUE; 8223 } 8224 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8225 ncoarse = PetscMax(1,ncoarse); 8226 if (!pcbddc->coarse_subassembling) { 8227 if (pcbddc->coarsening_ratio > 1) { 8228 if (multilevel_requested) { 8229 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8230 } else { 8231 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8232 } 8233 } else { 8234 PetscMPIInt rank; 8235 8236 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 8237 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8238 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8239 } 8240 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8241 PetscInt psum; 8242 if (pcbddc->coarse_ksp) psum = 1; 8243 else psum = 0; 8244 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 8245 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8246 } 8247 /* determine if we can go multilevel */ 8248 if (multilevel_requested) { 8249 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8250 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8251 } 8252 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8253 8254 /* dump subassembling pattern */ 8255 if (pcbddc->dbg_flag && multilevel_allowed) { 8256 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 8257 } 8258 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8259 nedcfield = -1; 8260 corners = NULL; 8261 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8262 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8263 const PetscInt *idxs; 8264 ISLocalToGlobalMapping tmap; 8265 8266 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8267 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 8268 /* allocate space for temporary storage */ 8269 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 8270 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 8271 /* allocate for IS array */ 8272 nisdofs = pcbddc->n_ISForDofsLocal; 8273 if (pcbddc->nedclocal) { 8274 if (pcbddc->nedfield > -1) { 8275 nedcfield = pcbddc->nedfield; 8276 } else { 8277 nedcfield = 0; 8278 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8279 nisdofs = 1; 8280 } 8281 } 8282 nisneu = !!pcbddc->NeumannBoundariesLocal; 8283 nisvert = 0; /* nisvert is not used */ 8284 nis = nisdofs + nisneu + nisvert; 8285 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 8286 /* dofs splitting */ 8287 for (i=0;i<nisdofs;i++) { 8288 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 8289 if (nedcfield != i) { 8290 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 8291 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8292 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8293 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8294 } else { 8295 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 8296 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8297 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8298 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8299 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8300 } 8301 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8302 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8303 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 8304 } 8305 /* neumann boundaries */ 8306 if (pcbddc->NeumannBoundariesLocal) { 8307 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 8308 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 8309 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8310 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8311 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8312 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8313 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8314 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8315 } 8316 /* coordinates */ 8317 if (pcbddc->corner_selected) { 8318 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8319 ierr = ISGetLocalSize(corners,&tsize);CHKERRQ(ierr); 8320 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8321 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8322 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout); 8323 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8324 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8325 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8326 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);CHKERRQ(ierr); 8327 } 8328 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8329 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8330 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8331 } else { 8332 nis = 0; 8333 nisdofs = 0; 8334 nisneu = 0; 8335 nisvert = 0; 8336 isarray = NULL; 8337 } 8338 /* destroy no longer needed map */ 8339 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8340 8341 /* subassemble */ 8342 if (multilevel_allowed) { 8343 Vec vp[1]; 8344 PetscInt nvecs = 0; 8345 PetscBool reuse,reuser; 8346 8347 if (coarse_mat) reuse = PETSC_TRUE; 8348 else reuse = PETSC_FALSE; 8349 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 8350 vp[0] = NULL; 8351 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8352 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8353 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8354 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8355 nvecs = 1; 8356 8357 if (pcbddc->divudotp) { 8358 Mat B,loc_divudotp; 8359 Vec v,p; 8360 IS dummy; 8361 PetscInt np; 8362 8363 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8364 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8365 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8366 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8367 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8368 ierr = VecSet(p,1.);CHKERRQ(ierr); 8369 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8370 ierr = VecDestroy(&p);CHKERRQ(ierr); 8371 ierr = MatDestroy(&B);CHKERRQ(ierr); 8372 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8373 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8374 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8375 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8376 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8377 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8378 ierr = VecDestroy(&v);CHKERRQ(ierr); 8379 } 8380 } 8381 if (reuser) { 8382 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8383 } else { 8384 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8385 } 8386 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8387 PetscScalar *arraym; 8388 const PetscScalar *arrayv; 8389 PetscInt nl; 8390 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8391 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8392 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8393 ierr = VecGetArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8394 ierr = PetscArraycpy(arraym,arrayv,nl);CHKERRQ(ierr); 8395 ierr = VecRestoreArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8396 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8397 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8398 } else { 8399 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8400 } 8401 } else { 8402 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8403 } 8404 if (coarse_mat_is || coarse_mat) { 8405 if (!multilevel_allowed) { 8406 ierr = MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8407 } else { 8408 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8409 if (coarse_mat_is) { 8410 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8411 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8412 coarse_mat = coarse_mat_is; 8413 } 8414 } 8415 } 8416 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8417 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8418 8419 /* create local to global scatters for coarse problem */ 8420 if (compute_vecs) { 8421 PetscInt lrows; 8422 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8423 if (coarse_mat) { 8424 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8425 } else { 8426 lrows = 0; 8427 } 8428 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8429 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8430 ierr = VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);CHKERRQ(ierr); 8431 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8432 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8433 } 8434 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8435 8436 /* set defaults for coarse KSP and PC */ 8437 if (multilevel_allowed) { 8438 coarse_ksp_type = KSPRICHARDSON; 8439 coarse_pc_type = PCBDDC; 8440 } else { 8441 coarse_ksp_type = KSPPREONLY; 8442 coarse_pc_type = PCREDUNDANT; 8443 } 8444 8445 /* print some info if requested */ 8446 if (pcbddc->dbg_flag) { 8447 if (!multilevel_allowed) { 8448 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8449 if (multilevel_requested) { 8450 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8451 } else if (pcbddc->max_levels) { 8452 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);CHKERRQ(ierr); 8453 } 8454 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8455 } 8456 } 8457 8458 /* communicate coarse discrete gradient */ 8459 coarseG = NULL; 8460 if (pcbddc->nedcG && multilevel_allowed) { 8461 MPI_Comm ccomm; 8462 if (coarse_mat) { 8463 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8464 } else { 8465 ccomm = MPI_COMM_NULL; 8466 } 8467 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8468 } 8469 8470 /* create the coarse KSP object only once with defaults */ 8471 if (coarse_mat) { 8472 PetscBool isredundant,isbddc,force,valid; 8473 PetscViewer dbg_viewer = NULL; 8474 8475 if (pcbddc->dbg_flag) { 8476 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8477 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8478 } 8479 if (!pcbddc->coarse_ksp) { 8480 char prefix[256],str_level[16]; 8481 size_t len; 8482 8483 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8484 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8485 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8486 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8487 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8488 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8489 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8490 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8491 /* TODO is this logic correct? should check for coarse_mat type */ 8492 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8493 /* prefix */ 8494 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8495 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8496 if (!pcbddc->current_level) { 8497 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8498 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8499 } else { 8500 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8501 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8502 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8503 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8504 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8505 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8506 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8507 } 8508 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8509 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8510 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8511 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8512 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8513 /* allow user customization */ 8514 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8515 /* get some info after set from options */ 8516 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8517 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8518 force = PETSC_FALSE; 8519 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8520 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8521 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8522 if (multilevel_allowed && !force && !valid) { 8523 isbddc = PETSC_TRUE; 8524 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8525 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8526 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8527 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8528 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8529 ierr = PetscObjectOptionsBegin((PetscObject)pc_temp);CHKERRQ(ierr); 8530 ierr = (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);CHKERRQ(ierr); 8531 ierr = PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);CHKERRQ(ierr); 8532 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8533 pc_temp->setfromoptionscalled++; 8534 } 8535 } 8536 } 8537 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8538 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8539 if (nisdofs) { 8540 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8541 for (i=0;i<nisdofs;i++) { 8542 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8543 } 8544 } 8545 if (nisneu) { 8546 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8547 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8548 } 8549 if (nisvert) { 8550 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8551 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8552 } 8553 if (coarseG) { 8554 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8555 } 8556 8557 /* get some info after set from options */ 8558 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8559 8560 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8561 if (isbddc && !multilevel_allowed) { 8562 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8563 } 8564 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8565 force = PETSC_FALSE; 8566 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8567 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8568 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8569 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8570 } 8571 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8572 if (isredundant) { 8573 KSP inner_ksp; 8574 PC inner_pc; 8575 8576 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8577 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8578 } 8579 8580 /* parameters which miss an API */ 8581 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8582 if (isbddc) { 8583 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8584 8585 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8586 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8587 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8588 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8589 if (pcbddc_coarse->benign_saddle_point) { 8590 Mat coarsedivudotp_is; 8591 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8592 IS row,col; 8593 const PetscInt *gidxs; 8594 PetscInt n,st,M,N; 8595 8596 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8597 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRMPI(ierr); 8598 st = st-n; 8599 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8600 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8601 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8602 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8603 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8604 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8605 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8606 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8607 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8608 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8609 ierr = ISDestroy(&row);CHKERRQ(ierr); 8610 ierr = ISDestroy(&col);CHKERRQ(ierr); 8611 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8612 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8613 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8614 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8615 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8616 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8617 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8618 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8619 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8620 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8621 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8622 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8623 } 8624 } 8625 8626 /* propagate symmetry info of coarse matrix */ 8627 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8628 if (pc->pmat->symmetric_set) { 8629 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8630 } 8631 if (pc->pmat->hermitian_set) { 8632 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8633 } 8634 if (pc->pmat->spd_set) { 8635 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8636 } 8637 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8638 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8639 } 8640 /* set operators */ 8641 ierr = MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");CHKERRQ(ierr); 8642 ierr = MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 8643 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8644 if (pcbddc->dbg_flag) { 8645 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8646 } 8647 } 8648 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8649 ierr = PetscFree(isarray);CHKERRQ(ierr); 8650 #if 0 8651 { 8652 PetscViewer viewer; 8653 char filename[256]; 8654 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8655 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8656 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8657 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8658 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8659 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8660 } 8661 #endif 8662 8663 if (corners) { 8664 Vec gv; 8665 IS is; 8666 const PetscInt *idxs; 8667 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8668 PetscScalar *coords; 8669 8670 if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8671 ierr = VecGetSize(pcbddc->coarse_vec,&N);CHKERRQ(ierr); 8672 ierr = VecGetLocalSize(pcbddc->coarse_vec,&n);CHKERRQ(ierr); 8673 ierr = VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);CHKERRQ(ierr); 8674 ierr = VecSetBlockSize(gv,cdim);CHKERRQ(ierr); 8675 ierr = VecSetSizes(gv,n*cdim,N*cdim);CHKERRQ(ierr); 8676 ierr = VecSetType(gv,VECSTANDARD);CHKERRQ(ierr); 8677 ierr = VecSetFromOptions(gv);CHKERRQ(ierr); 8678 ierr = VecSet(gv,PETSC_MAX_REAL);CHKERRQ(ierr); /* we only propagate coordinates from vertices constraints */ 8679 8680 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8681 ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr); 8682 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 8683 ierr = PetscMalloc1(n*cdim,&coords);CHKERRQ(ierr); 8684 for (i=0;i<n;i++) { 8685 for (d=0;d<cdim;d++) { 8686 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8687 } 8688 } 8689 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 8690 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8691 8692 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 8693 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8694 ierr = VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);CHKERRQ(ierr); 8695 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8696 ierr = PetscFree(coords);CHKERRQ(ierr); 8697 ierr = VecAssemblyBegin(gv);CHKERRQ(ierr); 8698 ierr = VecAssemblyEnd(gv);CHKERRQ(ierr); 8699 ierr = VecGetArray(gv,&coords);CHKERRQ(ierr); 8700 if (pcbddc->coarse_ksp) { 8701 PC coarse_pc; 8702 PetscBool isbddc; 8703 8704 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 8705 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 8706 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8707 PetscReal *realcoords; 8708 8709 ierr = VecGetLocalSize(gv,&n);CHKERRQ(ierr); 8710 #if defined(PETSC_USE_COMPLEX) 8711 ierr = PetscMalloc1(n,&realcoords);CHKERRQ(ierr); 8712 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8713 #else 8714 realcoords = coords; 8715 #endif 8716 ierr = PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);CHKERRQ(ierr); 8717 #if defined(PETSC_USE_COMPLEX) 8718 ierr = PetscFree(realcoords);CHKERRQ(ierr); 8719 #endif 8720 } 8721 } 8722 ierr = VecRestoreArray(gv,&coords);CHKERRQ(ierr); 8723 ierr = VecDestroy(&gv);CHKERRQ(ierr); 8724 } 8725 ierr = ISDestroy(&corners);CHKERRQ(ierr); 8726 8727 if (pcbddc->coarse_ksp) { 8728 Vec crhs,csol; 8729 8730 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8731 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8732 if (!csol) { 8733 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8734 } 8735 if (!crhs) { 8736 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8737 } 8738 } 8739 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8740 8741 /* compute null space for coarse solver if the benign trick has been requested */ 8742 if (pcbddc->benign_null) { 8743 8744 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8745 for (i=0;i<pcbddc->benign_n;i++) { 8746 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8747 } 8748 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8749 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8750 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8751 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8752 if (coarse_mat) { 8753 Vec nullv; 8754 PetscScalar *array,*array2; 8755 PetscInt nl; 8756 8757 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8758 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8759 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8760 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8761 ierr = PetscArraycpy(array2,array,nl);CHKERRQ(ierr); 8762 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8763 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8764 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8765 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8766 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8767 } 8768 } 8769 ierr = PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8770 8771 ierr = PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8772 if (pcbddc->coarse_ksp) { 8773 PetscBool ispreonly; 8774 8775 if (CoarseNullSpace) { 8776 PetscBool isnull; 8777 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8778 if (isnull) { 8779 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8780 } 8781 /* TODO: add local nullspaces (if any) */ 8782 } 8783 /* setup coarse ksp */ 8784 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8785 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8786 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8787 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8788 KSP check_ksp; 8789 KSPType check_ksp_type; 8790 PC check_pc; 8791 Vec check_vec,coarse_vec; 8792 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8793 PetscInt its; 8794 PetscBool compute_eigs; 8795 PetscReal *eigs_r,*eigs_c; 8796 PetscInt neigs; 8797 const char *prefix; 8798 8799 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8800 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8801 ierr = PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);CHKERRQ(ierr); 8802 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 8803 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8804 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8805 /* prevent from setup unneeded object */ 8806 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8807 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8808 if (ispreonly) { 8809 check_ksp_type = KSPPREONLY; 8810 compute_eigs = PETSC_FALSE; 8811 } else { 8812 check_ksp_type = KSPGMRES; 8813 compute_eigs = PETSC_TRUE; 8814 } 8815 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8816 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8817 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8818 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8819 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8820 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8821 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8822 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8823 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8824 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8825 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8826 /* create random vec */ 8827 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8828 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8829 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8830 /* solve coarse problem */ 8831 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8832 ierr = KSPCheckSolve(check_ksp,pc,coarse_vec);CHKERRQ(ierr); 8833 /* set eigenvalue estimation if preonly has not been requested */ 8834 if (compute_eigs) { 8835 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8836 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8837 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8838 if (neigs) { 8839 lambda_max = eigs_r[neigs-1]; 8840 lambda_min = eigs_r[0]; 8841 if (pcbddc->use_coarse_estimates) { 8842 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8843 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8844 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8845 } 8846 } 8847 } 8848 } 8849 8850 /* check coarse problem residual error */ 8851 if (pcbddc->dbg_flag) { 8852 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8853 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8854 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8855 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8856 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8857 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8858 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8859 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8860 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8861 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8862 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8863 if (CoarseNullSpace) { 8864 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8865 } 8866 if (compute_eigs) { 8867 PetscReal lambda_max_s,lambda_min_s; 8868 KSPConvergedReason reason; 8869 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8870 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8871 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8872 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8873 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8874 for (i=0;i<neigs;i++) { 8875 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8876 } 8877 } 8878 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8879 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8880 } 8881 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8882 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8883 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8884 if (compute_eigs) { 8885 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8886 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8887 } 8888 } 8889 } 8890 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8891 /* print additional info */ 8892 if (pcbddc->dbg_flag) { 8893 /* waits until all processes reaches this point */ 8894 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8895 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);CHKERRQ(ierr); 8896 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8897 } 8898 8899 /* free memory */ 8900 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8901 ierr = PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8902 PetscFunctionReturn(0); 8903 } 8904 8905 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8906 { 8907 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8908 PC_IS* pcis = (PC_IS*)pc->data; 8909 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8910 IS subset,subset_mult,subset_n; 8911 PetscInt local_size,coarse_size=0; 8912 PetscInt *local_primal_indices=NULL; 8913 const PetscInt *t_local_primal_indices; 8914 PetscErrorCode ierr; 8915 8916 PetscFunctionBegin; 8917 /* Compute global number of coarse dofs */ 8918 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8919 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8920 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8921 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8922 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8923 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8924 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8925 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8926 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8927 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8928 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8929 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8930 ierr = PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);CHKERRQ(ierr); 8931 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8932 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8933 8934 /* check numbering */ 8935 if (pcbddc->dbg_flag) { 8936 PetscScalar coarsesum,*array,*array2; 8937 PetscInt i; 8938 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8939 8940 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8941 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8942 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8943 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8944 /* counter */ 8945 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8946 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8947 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8948 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8949 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8950 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8951 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8952 for (i=0;i<pcbddc->local_primal_size;i++) { 8953 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8954 } 8955 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8956 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8957 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8958 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8959 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8960 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8961 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8962 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8963 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8964 for (i=0;i<pcis->n;i++) { 8965 if (array[i] != 0.0 && array[i] != array2[i]) { 8966 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8967 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8968 set_error = PETSC_TRUE; 8969 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8971 } 8972 } 8973 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8974 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 8975 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8976 for (i=0;i<pcis->n;i++) { 8977 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8978 } 8979 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8980 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8981 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8982 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8983 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8984 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8985 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8986 PetscInt *gidxs; 8987 8988 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8989 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8990 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8991 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8993 for (i=0;i<pcbddc->local_primal_size;i++) { 8994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8995 } 8996 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8997 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8998 } 8999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9000 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9001 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 9002 } 9003 9004 /* get back data */ 9005 *coarse_size_n = coarse_size; 9006 *local_primal_indices_n = local_primal_indices; 9007 PetscFunctionReturn(0); 9008 } 9009 9010 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 9011 { 9012 IS localis_t; 9013 PetscInt i,lsize,*idxs,n; 9014 PetscScalar *vals; 9015 PetscErrorCode ierr; 9016 9017 PetscFunctionBegin; 9018 /* get indices in local ordering exploiting local to global map */ 9019 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 9020 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 9021 for (i=0;i<lsize;i++) vals[i] = 1.0; 9022 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9023 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 9024 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 9025 if (idxs) { /* multilevel guard */ 9026 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 9027 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 9028 } 9029 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 9030 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9031 ierr = PetscFree(vals);CHKERRQ(ierr); 9032 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 9033 /* now compute set in local ordering */ 9034 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9035 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9036 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9037 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 9038 for (i=0,lsize=0;i<n;i++) { 9039 if (PetscRealPart(vals[i]) > 0.5) { 9040 lsize++; 9041 } 9042 } 9043 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 9044 for (i=0,lsize=0;i<n;i++) { 9045 if (PetscRealPart(vals[i]) > 0.5) { 9046 idxs[lsize++] = i; 9047 } 9048 } 9049 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9050 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 9051 *localis = localis_t; 9052 PetscFunctionReturn(0); 9053 } 9054 9055 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9056 { 9057 PC_IS *pcis=(PC_IS*)pc->data; 9058 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9059 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9060 Mat S_j; 9061 PetscInt *used_xadj,*used_adjncy; 9062 PetscBool free_used_adj; 9063 PetscErrorCode ierr; 9064 9065 PetscFunctionBegin; 9066 ierr = PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9067 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9068 free_used_adj = PETSC_FALSE; 9069 if (pcbddc->sub_schurs_layers == -1) { 9070 used_xadj = NULL; 9071 used_adjncy = NULL; 9072 } else { 9073 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9074 used_xadj = pcbddc->mat_graph->xadj; 9075 used_adjncy = pcbddc->mat_graph->adjncy; 9076 } else if (pcbddc->computed_rowadj) { 9077 used_xadj = pcbddc->mat_graph->xadj; 9078 used_adjncy = pcbddc->mat_graph->adjncy; 9079 } else { 9080 PetscBool flg_row=PETSC_FALSE; 9081 const PetscInt *xadj,*adjncy; 9082 PetscInt nvtxs; 9083 9084 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9085 if (flg_row) { 9086 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 9087 ierr = PetscArraycpy(used_xadj,xadj,nvtxs+1);CHKERRQ(ierr); 9088 ierr = PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);CHKERRQ(ierr); 9089 free_used_adj = PETSC_TRUE; 9090 } else { 9091 pcbddc->sub_schurs_layers = -1; 9092 used_xadj = NULL; 9093 used_adjncy = NULL; 9094 } 9095 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9096 } 9097 } 9098 9099 /* setup sub_schurs data */ 9100 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9101 if (!sub_schurs->schur_explicit) { 9102 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9103 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9104 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 9105 } else { 9106 Mat change = NULL; 9107 Vec scaling = NULL; 9108 IS change_primal = NULL, iP; 9109 PetscInt benign_n; 9110 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9111 PetscBool need_change = PETSC_FALSE; 9112 PetscBool discrete_harmonic = PETSC_FALSE; 9113 9114 if (!pcbddc->use_vertices && reuse_solvers) { 9115 PetscInt n_vertices; 9116 9117 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 9118 reuse_solvers = (PetscBool)!n_vertices; 9119 } 9120 if (!pcbddc->benign_change_explicit) { 9121 benign_n = pcbddc->benign_n; 9122 } else { 9123 benign_n = 0; 9124 } 9125 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9126 We need a global reduction to avoid possible deadlocks. 9127 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9128 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9129 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9130 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 9131 need_change = (PetscBool)(!need_change); 9132 } 9133 /* If the user defines additional constraints, we import them here. 9134 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 9135 if (need_change) { 9136 PC_IS *pcisf; 9137 PC_BDDC *pcbddcf; 9138 PC pcf; 9139 9140 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9141 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 9142 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 9143 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 9144 9145 /* hacks */ 9146 pcisf = (PC_IS*)pcf->data; 9147 pcisf->is_B_local = pcis->is_B_local; 9148 pcisf->vec1_N = pcis->vec1_N; 9149 pcisf->BtoNmap = pcis->BtoNmap; 9150 pcisf->n = pcis->n; 9151 pcisf->n_B = pcis->n_B; 9152 pcbddcf = (PC_BDDC*)pcf->data; 9153 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 9154 pcbddcf->mat_graph = pcbddc->mat_graph; 9155 pcbddcf->use_faces = PETSC_TRUE; 9156 pcbddcf->use_change_of_basis = PETSC_TRUE; 9157 pcbddcf->use_change_on_faces = PETSC_TRUE; 9158 pcbddcf->use_qr_single = PETSC_TRUE; 9159 pcbddcf->fake_change = PETSC_TRUE; 9160 9161 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 9162 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 9163 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 9164 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 9165 change = pcbddcf->ConstraintMatrix; 9166 pcbddcf->ConstraintMatrix = NULL; 9167 9168 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 9169 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 9170 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 9171 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 9172 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 9173 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 9174 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 9175 pcf->ops->destroy = NULL; 9176 pcf->ops->reset = NULL; 9177 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 9178 } 9179 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9180 9181 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 9182 if (iP) { 9183 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 9184 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 9185 ierr = PetscOptionsEnd();CHKERRQ(ierr); 9186 } 9187 if (discrete_harmonic) { 9188 Mat A; 9189 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 9190 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 9191 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 9192 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9193 ierr = MatDestroy(&A);CHKERRQ(ierr); 9194 } else { 9195 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9196 } 9197 ierr = MatDestroy(&change);CHKERRQ(ierr); 9198 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 9199 } 9200 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9201 9202 /* free adjacency */ 9203 if (free_used_adj) { 9204 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 9205 } 9206 ierr = PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9207 PetscFunctionReturn(0); 9208 } 9209 9210 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9211 { 9212 PC_IS *pcis=(PC_IS*)pc->data; 9213 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9214 PCBDDCGraph graph; 9215 PetscErrorCode ierr; 9216 9217 PetscFunctionBegin; 9218 /* attach interface graph for determining subsets */ 9219 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9220 IS verticesIS,verticescomm; 9221 PetscInt vsize,*idxs; 9222 9223 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9224 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 9225 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9226 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 9227 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9228 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9229 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 9230 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 9231 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 9232 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 9233 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 9234 } else { 9235 graph = pcbddc->mat_graph; 9236 } 9237 /* print some info */ 9238 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9239 IS vertices; 9240 PetscInt nv,nedges,nfaces; 9241 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 9242 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9243 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 9244 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9245 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 9246 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);CHKERRQ(ierr); 9247 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);CHKERRQ(ierr); 9248 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);CHKERRQ(ierr); 9249 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9250 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9251 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9252 } 9253 9254 /* sub_schurs init */ 9255 if (!pcbddc->sub_schurs) { 9256 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 9257 } 9258 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 9259 9260 /* free graph struct */ 9261 if (pcbddc->sub_schurs_rebuild) { 9262 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 9263 } 9264 PetscFunctionReturn(0); 9265 } 9266 9267 PetscErrorCode PCBDDCCheckOperator(PC pc) 9268 { 9269 PC_IS *pcis=(PC_IS*)pc->data; 9270 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9271 PetscErrorCode ierr; 9272 9273 PetscFunctionBegin; 9274 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9275 IS zerodiag = NULL; 9276 Mat S_j,B0_B=NULL; 9277 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9278 PetscScalar *p0_check,*array,*array2; 9279 PetscReal norm; 9280 PetscInt i; 9281 9282 /* B0 and B0_B */ 9283 if (zerodiag) { 9284 IS dummy; 9285 9286 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 9287 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 9288 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 9289 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 9290 } 9291 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9292 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 9293 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 9294 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9295 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9296 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9297 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9298 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 9299 /* S_j */ 9300 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9301 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9302 9303 /* mimic vector in \widetilde{W}_\Gamma */ 9304 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 9305 /* continuous in primal space */ 9306 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 9307 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9308 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9309 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9310 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 9311 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9312 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9313 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9314 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9315 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9316 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9317 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9318 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 9319 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 9320 9321 /* assemble rhs for coarse problem */ 9322 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9323 /* local with Schur */ 9324 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 9325 if (zerodiag) { 9326 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9327 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9328 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9329 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 9330 } 9331 /* sum on primal nodes the local contributions */ 9332 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9333 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9334 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9335 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9336 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9337 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9338 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9339 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 9340 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9341 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9342 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9343 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9344 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9345 /* scale primal nodes (BDDC sums contibutions) */ 9346 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 9347 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9348 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9349 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9350 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9351 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9352 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9353 /* global: \widetilde{B0}_B w_\Gamma */ 9354 if (zerodiag) { 9355 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 9356 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9357 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9358 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9359 } 9360 /* BDDC */ 9361 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 9362 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 9363 9364 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 9365 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 9366 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 9367 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);CHKERRQ(ierr); 9368 for (i=0;i<pcbddc->benign_n;i++) { 9369 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));CHKERRQ(ierr); 9370 } 9371 ierr = PetscFree(p0_check);CHKERRQ(ierr); 9372 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 9373 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 9374 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 9375 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9376 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 9377 } 9378 PetscFunctionReturn(0); 9379 } 9380 9381 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9382 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9383 { 9384 Mat At; 9385 IS rows; 9386 PetscInt rst,ren; 9387 PetscErrorCode ierr; 9388 PetscLayout rmap; 9389 9390 PetscFunctionBegin; 9391 rst = ren = 0; 9392 if (ccomm != MPI_COMM_NULL) { 9393 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 9394 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 9395 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 9396 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 9397 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 9398 } 9399 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 9400 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 9401 ierr = ISDestroy(&rows);CHKERRQ(ierr); 9402 9403 if (ccomm != MPI_COMM_NULL) { 9404 Mat_MPIAIJ *a,*b; 9405 IS from,to; 9406 Vec gvec; 9407 PetscInt lsize; 9408 9409 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9410 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9411 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9412 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9413 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9414 a = (Mat_MPIAIJ*)At->data; 9415 b = (Mat_MPIAIJ*)(*B)->data; 9416 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRMPI(ierr); 9417 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRMPI(ierr); 9418 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9419 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9420 b->A = a->A; 9421 b->B = a->B; 9422 9423 b->donotstash = a->donotstash; 9424 b->roworiented = a->roworiented; 9425 b->rowindices = NULL; 9426 b->rowvalues = NULL; 9427 b->getrowactive = PETSC_FALSE; 9428 9429 (*B)->rmap = rmap; 9430 (*B)->factortype = A->factortype; 9431 (*B)->assembled = PETSC_TRUE; 9432 (*B)->insertmode = NOT_SET_VALUES; 9433 (*B)->preallocated = PETSC_TRUE; 9434 9435 if (a->colmap) { 9436 #if defined(PETSC_USE_CTABLE) 9437 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9438 #else 9439 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9440 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9441 ierr = PetscArraycpy(b->colmap,a->colmap,At->cmap->N);CHKERRQ(ierr); 9442 #endif 9443 } else b->colmap = NULL; 9444 if (a->garray) { 9445 PetscInt len; 9446 len = a->B->cmap->n; 9447 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9448 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9449 if (len) { ierr = PetscArraycpy(b->garray,a->garray,len);CHKERRQ(ierr); } 9450 } else b->garray = NULL; 9451 9452 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9453 b->lvec = a->lvec; 9454 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9455 9456 /* cannot use VecScatterCopy */ 9457 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9458 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9459 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9460 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9461 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9462 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9463 ierr = ISDestroy(&from);CHKERRQ(ierr); 9464 ierr = ISDestroy(&to);CHKERRQ(ierr); 9465 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9466 } 9467 ierr = MatDestroy(&At);CHKERRQ(ierr); 9468 PetscFunctionReturn(0); 9469 } 9470