1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 PetscErrorCode ierr; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 28 if (!nr || !nc) PetscFunctionReturn(0); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 33 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr,nc); 39 if (!rwork) { 40 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 50 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 51 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 #else 55 ierr = PetscMalloc1(5*n,&rwork2);CHKERRQ(ierr); 56 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 57 ierr = PetscFree(rwork2);CHKERRQ(ierr); 58 #endif 59 ierr = PetscFPTrapPop();CHKERRQ(ierr); 60 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 61 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 62 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 63 if (!rwork) { 64 ierr = PetscFree(sing);CHKERRQ(ierr); 65 } 66 if (!work) { 67 ierr = PetscFree(uwork);CHKERRQ(ierr); 68 } 69 /* create B */ 70 if (!range) { 71 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 72 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscArraycpy(data,U+nr*i,(nr-i)*nr);CHKERRQ(ierr); 74 } else { 75 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 76 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 77 ierr = PetscArraycpy(data,U,i*nr);CHKERRQ(ierr); 78 } 79 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 80 ierr = PetscFree(U);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 const PetscScalar *vals; 122 PetscScalar v; 123 124 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 125 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 126 ierr = MatDenseGetArrayRead(GEd,&vals);CHKERRQ(ierr); 127 /* v = PetscAbsScalar(vals[0]) */; 128 v = 1.; 129 cvals[0] = vals[0]/v; 130 cvals[1] = vals[1]/v; 131 ierr = MatDenseRestoreArrayRead(GEd,&vals);CHKERRQ(ierr); 132 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 133 #if defined(PRINT_GDET) 134 { 135 PetscViewer viewer; 136 char filename[256]; 137 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 138 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 139 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 141 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 142 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 143 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 144 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 145 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 146 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 147 } 148 #endif 149 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 150 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 151 } 152 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode PCBDDCNedelecSupport(PC pc) 157 { 158 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 159 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 160 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 161 Vec tvec; 162 PetscSF sfv; 163 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 164 MPI_Comm comm; 165 IS lned,primals,allprimals,nedfieldlocal; 166 IS *eedges,*extrows,*extcols,*alleedges; 167 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 168 PetscScalar *vals,*work; 169 PetscReal *rwork; 170 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 171 PetscInt ne,nv,Lv,order,n,field; 172 PetscInt n_neigh,*neigh,*n_shared,**shared; 173 PetscInt i,j,extmem,cum,maxsize,nee; 174 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 175 PetscInt *sfvleaves,*sfvroots; 176 PetscInt *corners,*cedges; 177 PetscInt *ecount,**eneighs,*vcount,**vneighs; 178 PetscInt *emarks; 179 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 /* If the discrete gradient is defined for a subset of dofs and global is true, 184 it assumes G is given in global ordering for all the dofs. 185 Otherwise, the ordering is global for the Nedelec field */ 186 order = pcbddc->nedorder; 187 conforming = pcbddc->conforming; 188 field = pcbddc->nedfield; 189 global = pcbddc->nedglobal; 190 setprimal = PETSC_FALSE; 191 print = PETSC_FALSE; 192 singular = PETSC_FALSE; 193 194 /* Command line customization */ 195 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 196 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 199 /* print debug info TODO: to be removed */ 200 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 201 ierr = PetscOptionsEnd();CHKERRQ(ierr); 202 203 /* Return if there are no edges in the decomposition and the problem is not singular */ 204 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 205 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 206 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 207 if (!singular) { 208 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 209 lrc[0] = PETSC_FALSE; 210 for (i=0;i<n;i++) { 211 if (PetscRealPart(vals[i]) > 2.) { 212 lrc[0] = PETSC_TRUE; 213 break; 214 } 215 } 216 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 217 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr); 218 if (!lrc[1]) PetscFunctionReturn(0); 219 } 220 221 /* Get Nedelec field */ 222 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 223 if (pcbddc->n_ISForDofsLocal && field >= 0) { 224 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 225 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 226 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 227 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 228 ne = n; 229 nedfieldlocal = NULL; 230 global = PETSC_TRUE; 231 } else if (field == PETSC_DECIDE) { 232 PetscInt rst,ren,*idx; 233 234 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 235 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 236 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 237 for (i=rst;i<ren;i++) { 238 PetscInt nc; 239 240 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 242 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 } 244 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 245 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 246 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 247 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 248 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 249 } else { 250 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 251 } 252 253 /* Sanity checks */ 254 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 255 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 256 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 257 258 /* Just set primal dofs and return */ 259 if (setprimal) { 260 IS enedfieldlocal; 261 PetscInt *eidxs; 262 263 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 264 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 265 if (nedfieldlocal) { 266 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 267 for (i=0,cum=0;i<ne;i++) { 268 if (PetscRealPart(vals[idxs[i]]) > 2.) { 269 eidxs[cum++] = idxs[i]; 270 } 271 } 272 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 273 } else { 274 for (i=0,cum=0;i<ne;i++) { 275 if (PetscRealPart(vals[i]) > 2.) { 276 eidxs[cum++] = i; 277 } 278 } 279 } 280 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 281 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 282 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 283 ierr = PetscFree(eidxs);CHKERRQ(ierr); 284 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 285 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 /* Compute some l2g maps */ 290 if (nedfieldlocal) { 291 IS is; 292 293 /* need to map from the local Nedelec field to local numbering */ 294 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 295 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 296 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 297 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 298 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 299 if (global) { 300 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 301 el2g = al2g; 302 } else { 303 IS gis; 304 305 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 306 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 307 ierr = ISDestroy(&gis);CHKERRQ(ierr); 308 } 309 ierr = ISDestroy(&is);CHKERRQ(ierr); 310 } else { 311 /* restore default */ 312 pcbddc->nedfield = -1; 313 /* one ref for the destruction of al2g, one for el2g */ 314 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 el2g = al2g; 317 fl2g = NULL; 318 } 319 320 /* Start communication to drop connections for interior edges (for cc analysis only) */ 321 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 322 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 323 if (nedfieldlocal) { 324 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 326 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 } else { 328 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 329 } 330 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 331 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 333 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 334 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 335 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 336 if (global) { 337 PetscInt rst; 338 339 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 340 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 341 if (matis->sf_rootdata[i] < 2) { 342 matis->sf_rootdata[cum++] = i + rst; 343 } 344 } 345 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 346 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 347 } else { 348 PetscInt *tbz; 349 350 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 351 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 352 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 353 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 354 for (i=0,cum=0;i<ne;i++) 355 if (matis->sf_leafdata[idxs[i]] == 1) 356 tbz[cum++] = i; 357 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 358 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 359 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 360 ierr = PetscFree(tbz);CHKERRQ(ierr); 361 } 362 } else { /* we need the entire G to infer the nullspace */ 363 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 364 G = pcbddc->discretegradient; 365 } 366 367 /* Extract subdomain relevant rows of G */ 368 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 369 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 370 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 371 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 372 ierr = ISDestroy(&lned);CHKERRQ(ierr); 373 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 374 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 375 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 376 377 /* SF for nodal dofs communications */ 378 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 379 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 380 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 382 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 384 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 386 i = singular ? 2 : 1; 387 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 388 389 /* Destroy temporary G created in MATIS format and modified G */ 390 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 391 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 392 ierr = MatDestroy(&G);CHKERRQ(ierr); 393 394 if (print) { 395 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 396 ierr = MatView(lG,NULL);CHKERRQ(ierr); 397 } 398 399 /* Save lG for values insertion in change of basis */ 400 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 401 402 /* Analyze the edge-nodes connections (duplicate lG) */ 403 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 404 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 405 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 409 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 410 /* need to import the boundary specification to ensure the 411 proper detection of coarse edges' endpoints */ 412 if (pcbddc->DirichletBoundariesLocal) { 413 IS is; 414 415 if (fl2g) { 416 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 417 } else { 418 is = pcbddc->DirichletBoundariesLocal; 419 } 420 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 421 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 422 for (i=0;i<cum;i++) { 423 if (idxs[i] >= 0) { 424 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 425 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 426 } 427 } 428 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 429 if (fl2g) { 430 ierr = ISDestroy(&is);CHKERRQ(ierr); 431 } 432 } 433 if (pcbddc->NeumannBoundariesLocal) { 434 IS is; 435 436 if (fl2g) { 437 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 438 } else { 439 is = pcbddc->NeumannBoundariesLocal; 440 } 441 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 442 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 443 for (i=0;i<cum;i++) { 444 if (idxs[i] >= 0) { 445 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 446 } 447 } 448 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 449 if (fl2g) { 450 ierr = ISDestroy(&is);CHKERRQ(ierr); 451 } 452 } 453 454 /* Count neighs per dof */ 455 ierr = ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 456 ierr = ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 457 458 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 459 for proper detection of coarse edges' endpoints */ 460 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 461 for (i=0;i<ne;i++) { 462 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 463 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 467 if (!conforming) { 468 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 469 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 470 } 471 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 472 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 473 cum = 0; 474 for (i=0;i<ne;i++) { 475 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 476 if (!PetscBTLookup(btee,i)) { 477 marks[cum++] = i; 478 continue; 479 } 480 /* set badly connected edge dofs as primal */ 481 if (!conforming) { 482 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 483 marks[cum++] = i; 484 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 485 for (j=ii[i];j<ii[i+1];j++) { 486 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 487 } 488 } else { 489 /* every edge dofs should be connected trough a certain number of nodal dofs 490 to other edge dofs belonging to coarse edges 491 - at most 2 endpoints 492 - order-1 interior nodal dofs 493 - no undefined nodal dofs (nconn < order) 494 */ 495 PetscInt ends = 0,ints = 0, undef = 0; 496 for (j=ii[i];j<ii[i+1];j++) { 497 PetscInt v = jj[j],k; 498 PetscInt nconn = iit[v+1]-iit[v]; 499 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 500 if (nconn > order) ends++; 501 else if (nconn == order) ints++; 502 else undef++; 503 } 504 if (undef || ends > 2 || ints != order -1) { 505 marks[cum++] = i; 506 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 507 for (j=ii[i];j<ii[i+1];j++) { 508 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 509 } 510 } 511 } 512 } 513 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 514 if (!order && ii[i+1] != ii[i]) { 515 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 516 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 517 } 518 } 519 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 520 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 521 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 if (!conforming) { 523 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 524 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 525 } 526 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 527 528 /* identify splitpoints and corner candidates */ 529 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 530 if (print) { 531 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 532 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 533 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 534 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 535 } 536 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 537 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 538 for (i=0;i<nv;i++) { 539 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 540 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 541 if (!order) { /* variable order */ 542 PetscReal vorder = 0.; 543 544 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 545 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 546 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 547 ord = 1; 548 } 549 if (PetscUnlikelyDebug(test%ord)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord); 550 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 551 if (PetscBTLookup(btbd,jj[j])) { 552 bdir = PETSC_TRUE; 553 break; 554 } 555 if (vc != ecount[jj[j]]) { 556 sneighs = PETSC_FALSE; 557 } else { 558 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 559 for (k=0;k<vc;k++) { 560 if (vn[k] != en[k]) { 561 sneighs = PETSC_FALSE; 562 break; 563 } 564 } 565 } 566 } 567 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 568 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 569 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 570 } else if (test == ord) { 571 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 572 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 573 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 574 } else { 575 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 576 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 577 } 578 } 579 } 580 ierr = ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 581 ierr = ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 582 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 583 584 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 585 if (order != 1) { 586 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 587 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 if (PetscBTLookup(btvcand,i)) { 590 PetscBool found = PETSC_FALSE; 591 for (j=ii[i];j<ii[i+1] && !found;j++) { 592 PetscInt k,e = jj[j]; 593 if (PetscBTLookup(bte,e)) continue; 594 for (k=iit[e];k<iit[e+1];k++) { 595 PetscInt v = jjt[k]; 596 if (v != i && PetscBTLookup(btvcand,v)) { 597 found = PETSC_TRUE; 598 break; 599 } 600 } 601 } 602 if (!found) { 603 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 604 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 605 } else { 606 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 607 } 608 } 609 } 610 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 611 } 612 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 613 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 614 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 615 616 /* Get the local G^T explicitly */ 617 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 618 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 619 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 620 621 /* Mark interior nodal dofs */ 622 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 623 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 624 for (i=1;i<n_neigh;i++) { 625 for (j=0;j<n_shared[i];j++) { 626 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 627 } 628 } 629 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 630 631 /* communicate corners and splitpoints */ 632 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 633 ierr = PetscArrayzero(sfvleaves,nv);CHKERRQ(ierr); 634 ierr = PetscArrayzero(sfvroots,Lv);CHKERRQ(ierr); 635 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 636 637 if (print) { 638 IS tbz; 639 640 cum = 0; 641 for (i=0;i<nv;i++) 642 if (sfvleaves[i]) 643 vmarks[cum++] = i; 644 645 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 646 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 647 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 648 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 649 } 650 651 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 652 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 653 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);CHKERRQ(ierr); 654 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);CHKERRQ(ierr); 655 656 /* Zero rows of lGt corresponding to identified corners 657 and interior nodal dofs */ 658 cum = 0; 659 for (i=0;i<nv;i++) { 660 if (sfvleaves[i]) { 661 vmarks[cum++] = i; 662 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 663 } 664 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 665 } 666 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 667 if (print) { 668 IS tbz; 669 670 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 671 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 672 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 673 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 674 } 675 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 676 ierr = PetscFree(vmarks);CHKERRQ(ierr); 677 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 678 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 679 680 /* Recompute G */ 681 ierr = MatDestroy(&lG);CHKERRQ(ierr); 682 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 683 if (print) { 684 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 685 ierr = MatView(lG,NULL);CHKERRQ(ierr); 686 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 687 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 688 } 689 690 /* Get primal dofs (if any) */ 691 cum = 0; 692 for (i=0;i<ne;i++) { 693 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 694 } 695 if (fl2g) { 696 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 697 } 698 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 699 if (print) { 700 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 701 ierr = ISView(primals,NULL);CHKERRQ(ierr); 702 } 703 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 704 /* TODO: what if the user passed in some of them ? */ 705 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 706 ierr = ISDestroy(&primals);CHKERRQ(ierr); 707 708 /* Compute edge connectivity */ 709 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 710 711 /* Symbolic conn = lG*lGt */ 712 ierr = MatProductCreate(lG,lGt,NULL,&conn);CHKERRQ(ierr); 713 ierr = MatProductSetType(conn,MATPRODUCT_AB);CHKERRQ(ierr); 714 ierr = MatProductSetAlgorithm(conn,"default");CHKERRQ(ierr); 715 ierr = MatProductSetFill(conn,PETSC_DEFAULT);CHKERRQ(ierr); 716 ierr = PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");CHKERRQ(ierr); 717 ierr = MatProductSetFromOptions(conn);CHKERRQ(ierr); 718 ierr = MatProductSymbolic(conn);CHKERRQ(ierr); 719 720 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 721 if (fl2g) { 722 PetscBT btf; 723 PetscInt *iia,*jja,*iiu,*jju; 724 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 725 726 /* create CSR for all local dofs */ 727 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 728 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 729 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 730 iiu = pcbddc->mat_graph->xadj; 731 jju = pcbddc->mat_graph->adjncy; 732 } else if (pcbddc->use_local_adj) { 733 rest = PETSC_TRUE; 734 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 735 } else { 736 free = PETSC_TRUE; 737 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 738 iiu[0] = 0; 739 for (i=0;i<n;i++) { 740 iiu[i+1] = i+1; 741 jju[i] = -1; 742 } 743 } 744 745 /* import sizes of CSR */ 746 iia[0] = 0; 747 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 748 749 /* overwrite entries corresponding to the Nedelec field */ 750 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 751 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 752 for (i=0;i<ne;i++) { 753 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 754 iia[idxs[i]+1] = ii[i+1]-ii[i]; 755 } 756 757 /* iia in CSR */ 758 for (i=0;i<n;i++) iia[i+1] += iia[i]; 759 760 /* jja in CSR */ 761 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 762 for (i=0;i<n;i++) 763 if (!PetscBTLookup(btf,i)) 764 for (j=0;j<iiu[i+1]-iiu[i];j++) 765 jja[iia[i]+j] = jju[iiu[i]+j]; 766 767 /* map edge dofs connectivity */ 768 if (jj) { 769 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 770 for (i=0;i<ne;i++) { 771 PetscInt e = idxs[i]; 772 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 773 } 774 } 775 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 776 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 777 if (rest) { 778 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 779 } 780 if (free) { 781 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 782 } 783 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 784 } else { 785 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 786 } 787 788 /* Analyze interface for edge dofs */ 789 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 790 pcbddc->mat_graph->twodim = PETSC_FALSE; 791 792 /* Get coarse edges in the edge space */ 793 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 794 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 795 796 if (fl2g) { 797 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 798 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 799 for (i=0;i<nee;i++) { 800 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 801 } 802 } else { 803 eedges = alleedges; 804 primals = allprimals; 805 } 806 807 /* Mark fine edge dofs with their coarse edge id */ 808 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 809 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 810 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 811 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 812 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 813 if (print) { 814 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 815 ierr = ISView(primals,NULL);CHKERRQ(ierr); 816 } 817 818 maxsize = 0; 819 for (i=0;i<nee;i++) { 820 PetscInt size,mark = i+1; 821 822 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 823 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 824 for (j=0;j<size;j++) marks[idxs[j]] = mark; 825 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 826 maxsize = PetscMax(maxsize,size); 827 } 828 829 /* Find coarse edge endpoints */ 830 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 831 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 832 for (i=0;i<nee;i++) { 833 PetscInt mark = i+1,size; 834 835 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 836 if (!size && nedfieldlocal) continue; 837 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 838 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 839 if (print) { 840 ierr = PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);CHKERRQ(ierr); 841 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 842 } 843 for (j=0;j<size;j++) { 844 PetscInt k, ee = idxs[j]; 845 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 846 for (k=ii[ee];k<ii[ee+1];k++) { 847 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 848 if (PetscBTLookup(btv,jj[k])) { 849 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 850 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 851 PetscInt k2; 852 PetscBool corner = PETSC_FALSE; 853 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 854 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 855 /* it's a corner if either is connected with an edge dof belonging to a different cc or 856 if the edge dof lie on the natural part of the boundary */ 857 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 858 corner = PETSC_TRUE; 859 break; 860 } 861 } 862 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 863 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 864 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 865 } else { 866 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 867 } 868 } 869 } 870 } 871 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 872 } 873 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 874 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 875 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 876 877 /* Reset marked primal dofs */ 878 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 879 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 880 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 881 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 882 883 /* Now use the initial lG */ 884 ierr = MatDestroy(&lG);CHKERRQ(ierr); 885 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 886 lG = lGinit; 887 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 888 889 /* Compute extended cols indices */ 890 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 891 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 892 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 893 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 894 i *= maxsize; 895 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 896 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 897 eerr = PETSC_FALSE; 898 for (i=0;i<nee;i++) { 899 PetscInt size,found = 0; 900 901 cum = 0; 902 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 903 if (!size && nedfieldlocal) continue; 904 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 905 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 906 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 907 for (j=0;j<size;j++) { 908 PetscInt k,ee = idxs[j]; 909 for (k=ii[ee];k<ii[ee+1];k++) { 910 PetscInt vv = jj[k]; 911 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 912 else if (!PetscBTLookupSet(btvc,vv)) found++; 913 } 914 } 915 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 916 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 917 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 918 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 919 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 920 /* it may happen that endpoints are not defined at this point 921 if it is the case, mark this edge for a second pass */ 922 if (cum != size -1 || found != 2) { 923 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 924 if (print) { 925 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 926 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 927 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 928 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 929 } 930 eerr = PETSC_TRUE; 931 } 932 } 933 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 934 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr); 935 if (done) { 936 PetscInt *newprimals; 937 938 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 939 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 940 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 941 ierr = PetscArraycpy(newprimals,idxs,cum);CHKERRQ(ierr); 942 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 944 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 945 for (i=0;i<nee;i++) { 946 PetscBool has_candidates = PETSC_FALSE; 947 if (PetscBTLookup(bter,i)) { 948 PetscInt size,mark = i+1; 949 950 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 951 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 952 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 953 for (j=0;j<size;j++) { 954 PetscInt k,ee = idxs[j]; 955 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 956 for (k=ii[ee];k<ii[ee+1];k++) { 957 /* set all candidates located on the edge as corners */ 958 if (PetscBTLookup(btvcand,jj[k])) { 959 PetscInt k2,vv = jj[k]; 960 has_candidates = PETSC_TRUE; 961 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 962 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 963 /* set all edge dofs connected to candidate as primals */ 964 for (k2=iit[vv];k2<iit[vv+1];k2++) { 965 if (marks[jjt[k2]] == mark) { 966 PetscInt k3,ee2 = jjt[k2]; 967 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 968 newprimals[cum++] = ee2; 969 /* finally set the new corners */ 970 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 971 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 972 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 973 } 974 } 975 } 976 } else { 977 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 978 } 979 } 980 } 981 if (!has_candidates) { /* circular edge */ 982 PetscInt k, ee = idxs[0],*tmarks; 983 984 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 985 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 986 for (k=ii[ee];k<ii[ee+1];k++) { 987 PetscInt k2; 988 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 989 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 990 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 991 } 992 for (j=0;j<size;j++) { 993 if (tmarks[idxs[j]] > 1) { 994 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 995 newprimals[cum++] = idxs[j]; 996 } 997 } 998 ierr = PetscFree(tmarks);CHKERRQ(ierr); 999 } 1000 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1001 } 1002 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1003 } 1004 ierr = PetscFree(extcols);CHKERRQ(ierr); 1005 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1006 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1007 if (fl2g) { 1008 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1009 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1010 for (i=0;i<nee;i++) { 1011 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1012 } 1013 ierr = PetscFree(eedges);CHKERRQ(ierr); 1014 } 1015 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1016 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1017 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1018 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1019 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1020 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1021 pcbddc->mat_graph->twodim = PETSC_FALSE; 1022 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1023 if (fl2g) { 1024 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1025 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1026 for (i=0;i<nee;i++) { 1027 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1028 } 1029 } else { 1030 eedges = alleedges; 1031 primals = allprimals; 1032 } 1033 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1034 1035 /* Mark again */ 1036 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 1037 for (i=0;i<nee;i++) { 1038 PetscInt size,mark = i+1; 1039 1040 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1041 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1042 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1043 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1044 } 1045 if (print) { 1046 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1047 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1048 } 1049 1050 /* Recompute extended cols */ 1051 eerr = PETSC_FALSE; 1052 for (i=0;i<nee;i++) { 1053 PetscInt size; 1054 1055 cum = 0; 1056 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1057 if (!size && nedfieldlocal) continue; 1058 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1059 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1060 for (j=0;j<size;j++) { 1061 PetscInt k,ee = idxs[j]; 1062 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1063 } 1064 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1065 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1066 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1067 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1068 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1069 if (cum != size -1) { 1070 if (print) { 1071 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1072 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1073 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1074 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1075 } 1076 eerr = PETSC_TRUE; 1077 } 1078 } 1079 } 1080 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1081 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1082 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1083 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1084 /* an error should not occur at this point */ 1085 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1086 1087 /* Check the number of endpoints */ 1088 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1089 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1090 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1091 for (i=0;i<nee;i++) { 1092 PetscInt size, found = 0, gc[2]; 1093 1094 /* init with defaults */ 1095 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1096 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1097 if (!size && nedfieldlocal) continue; 1098 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1099 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1100 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1101 for (j=0;j<size;j++) { 1102 PetscInt k,ee = idxs[j]; 1103 for (k=ii[ee];k<ii[ee+1];k++) { 1104 PetscInt vv = jj[k]; 1105 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1106 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1107 corners[i*2+found++] = vv; 1108 } 1109 } 1110 } 1111 if (found != 2) { 1112 PetscInt e; 1113 if (fl2g) { 1114 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1115 } else { 1116 e = idxs[0]; 1117 } 1118 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1119 } 1120 1121 /* get primal dof index on this coarse edge */ 1122 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1123 if (gc[0] > gc[1]) { 1124 PetscInt swap = corners[2*i]; 1125 corners[2*i] = corners[2*i+1]; 1126 corners[2*i+1] = swap; 1127 } 1128 cedges[i] = idxs[size-1]; 1129 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1130 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1131 } 1132 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1134 1135 if (PetscDefined(USE_DEBUG)) { 1136 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1137 not interfere with neighbouring coarse edges */ 1138 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1139 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 for (i=0;i<nv;i++) { 1141 PetscInt emax = 0,eemax = 0; 1142 1143 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1144 ierr = PetscArrayzero(emarks,nee+1);CHKERRQ(ierr); 1145 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1146 for (j=1;j<nee+1;j++) { 1147 if (emax < emarks[j]) { 1148 emax = emarks[j]; 1149 eemax = j; 1150 } 1151 } 1152 /* not relevant for edges */ 1153 if (!eemax) continue; 1154 1155 for (j=ii[i];j<ii[i+1];j++) { 1156 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1157 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1158 } 1159 } 1160 } 1161 ierr = PetscFree(emarks);CHKERRQ(ierr); 1162 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1163 } 1164 1165 /* Compute extended rows indices for edge blocks of the change of basis */ 1166 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1167 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1168 extmem *= maxsize; 1169 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1170 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1171 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1172 for (i=0;i<nv;i++) { 1173 PetscInt mark = 0,size,start; 1174 1175 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1176 for (j=ii[i];j<ii[i+1];j++) 1177 if (marks[jj[j]] && !mark) 1178 mark = marks[jj[j]]; 1179 1180 /* not relevant */ 1181 if (!mark) continue; 1182 1183 /* import extended row */ 1184 mark--; 1185 start = mark*extmem+extrowcum[mark]; 1186 size = ii[i+1]-ii[i]; 1187 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1188 ierr = PetscArraycpy(extrow+start,jj+ii[i],size);CHKERRQ(ierr); 1189 extrowcum[mark] += size; 1190 } 1191 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1192 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1193 ierr = PetscFree(marks);CHKERRQ(ierr); 1194 1195 /* Compress extrows */ 1196 cum = 0; 1197 for (i=0;i<nee;i++) { 1198 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1199 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1200 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1201 cum = PetscMax(cum,size); 1202 } 1203 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1204 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1205 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1206 1207 /* Workspace for lapack inner calls and VecSetValues */ 1208 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1209 1210 /* Create change of basis matrix (preallocation can be improved) */ 1211 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1212 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1213 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1214 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1215 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1216 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1217 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1218 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1219 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1220 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1221 1222 /* Defaults to identity */ 1223 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1224 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1225 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1226 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1227 1228 /* Create discrete gradient for the coarser level if needed */ 1229 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1230 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1231 if (pcbddc->current_level < pcbddc->max_levels) { 1232 ISLocalToGlobalMapping cel2g,cvl2g; 1233 IS wis,gwis; 1234 PetscInt cnv,cne; 1235 1236 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1237 if (fl2g) { 1238 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1239 } else { 1240 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1241 pcbddc->nedclocal = wis; 1242 } 1243 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1244 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1245 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1246 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1247 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1248 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1249 1250 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1251 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1252 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1253 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1254 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1255 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1256 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1257 1258 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1259 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1260 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1261 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1262 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1263 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1264 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1265 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1266 } 1267 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1268 1269 #if defined(PRINT_GDET) 1270 inc = 0; 1271 lev = pcbddc->current_level; 1272 #endif 1273 1274 /* Insert values in the change of basis matrix */ 1275 for (i=0;i<nee;i++) { 1276 Mat Gins = NULL, GKins = NULL; 1277 IS cornersis = NULL; 1278 PetscScalar cvals[2]; 1279 1280 if (pcbddc->nedcG) { 1281 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1282 } 1283 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1284 if (Gins && GKins) { 1285 const PetscScalar *data; 1286 const PetscInt *rows,*cols; 1287 PetscInt nrh,nch,nrc,ncc; 1288 1289 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1290 /* H1 */ 1291 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1292 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1293 ierr = MatDenseGetArrayRead(Gins,&data);CHKERRQ(ierr); 1294 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1295 ierr = MatDenseRestoreArrayRead(Gins,&data);CHKERRQ(ierr); 1296 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1297 /* complement */ 1298 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1299 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1300 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1301 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1302 ierr = MatDenseGetArrayRead(GKins,&data);CHKERRQ(ierr); 1303 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1304 ierr = MatDenseRestoreArrayRead(GKins,&data);CHKERRQ(ierr); 1305 1306 /* coarse discrete gradient */ 1307 if (pcbddc->nedcG) { 1308 PetscInt cols[2]; 1309 1310 cols[0] = 2*i; 1311 cols[1] = 2*i+1; 1312 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1313 } 1314 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1315 } 1316 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1317 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1318 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1319 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1320 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1321 } 1322 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1323 1324 /* Start assembling */ 1325 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1326 if (pcbddc->nedcG) { 1327 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1328 } 1329 1330 /* Free */ 1331 if (fl2g) { 1332 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1333 for (i=0;i<nee;i++) { 1334 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1335 } 1336 ierr = PetscFree(eedges);CHKERRQ(ierr); 1337 } 1338 1339 /* hack mat_graph with primal dofs on the coarse edges */ 1340 { 1341 PCBDDCGraph graph = pcbddc->mat_graph; 1342 PetscInt *oqueue = graph->queue; 1343 PetscInt *ocptr = graph->cptr; 1344 PetscInt ncc,*idxs; 1345 1346 /* find first primal edge */ 1347 if (pcbddc->nedclocal) { 1348 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1349 } else { 1350 if (fl2g) { 1351 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1352 } 1353 idxs = cedges; 1354 } 1355 cum = 0; 1356 while (cum < nee && cedges[cum] < 0) cum++; 1357 1358 /* adapt connected components */ 1359 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1360 graph->cptr[0] = 0; 1361 for (i=0,ncc=0;i<graph->ncc;i++) { 1362 PetscInt lc = ocptr[i+1]-ocptr[i]; 1363 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1364 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1365 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1366 ncc++; 1367 lc--; 1368 cum++; 1369 while (cum < nee && cedges[cum] < 0) cum++; 1370 } 1371 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1372 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1373 ncc++; 1374 } 1375 graph->ncc = ncc; 1376 if (pcbddc->nedclocal) { 1377 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1380 } 1381 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1382 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1384 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1385 1386 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1387 ierr = PetscFree(extrow);CHKERRQ(ierr); 1388 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1389 ierr = PetscFree(corners);CHKERRQ(ierr); 1390 ierr = PetscFree(cedges);CHKERRQ(ierr); 1391 ierr = PetscFree(extrows);CHKERRQ(ierr); 1392 ierr = PetscFree(extcols);CHKERRQ(ierr); 1393 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1394 1395 /* Complete assembling */ 1396 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 if (pcbddc->nedcG) { 1398 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1399 #if 0 1400 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1401 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1402 #endif 1403 } 1404 1405 /* set change of basis */ 1406 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1407 ierr = MatDestroy(&T);CHKERRQ(ierr); 1408 1409 PetscFunctionReturn(0); 1410 } 1411 1412 /* the near-null space of BDDC carries information on quadrature weights, 1413 and these can be collinear -> so cheat with MatNullSpaceCreate 1414 and create a suitable set of basis vectors first */ 1415 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1416 { 1417 PetscErrorCode ierr; 1418 PetscInt i; 1419 1420 PetscFunctionBegin; 1421 for (i=0;i<nvecs;i++) { 1422 PetscInt first,last; 1423 1424 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1425 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1426 if (i>=first && i < last) { 1427 PetscScalar *data; 1428 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1429 if (!has_const) { 1430 data[i-first] = 1.; 1431 } else { 1432 data[2*i-first] = 1./PetscSqrtReal(2.); 1433 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1434 } 1435 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1436 } 1437 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1438 } 1439 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1440 for (i=0;i<nvecs;i++) { /* reset vectors */ 1441 PetscInt first,last; 1442 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1443 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1444 if (i>=first && i < last) { 1445 PetscScalar *data; 1446 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1447 if (!has_const) { 1448 data[i-first] = 0.; 1449 } else { 1450 data[2*i-first] = 0.; 1451 data[2*i-first+1] = 0.; 1452 } 1453 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1454 } 1455 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1456 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1462 { 1463 Mat loc_divudotp; 1464 Vec p,v,vins,quad_vec,*quad_vecs; 1465 ISLocalToGlobalMapping map; 1466 PetscScalar *vals; 1467 const PetscScalar *array; 1468 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1469 PetscInt n_neigh,*neigh,*n_shared,**shared; 1470 PetscMPIInt rank; 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1475 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1476 ierr = MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 1477 if (!maxneighs) { 1478 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1479 *nnsp = NULL; 1480 PetscFunctionReturn(0); 1481 } 1482 maxsize = 0; 1483 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1484 ierr = PetscMalloc2(maxsize,&gidxs,maxsize,&vals);CHKERRQ(ierr); 1485 /* create vectors to hold quadrature weights */ 1486 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1487 if (!transpose) { 1488 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1489 } else { 1490 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1491 } 1492 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1493 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1494 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1495 for (i=0;i<maxneighs;i++) { 1496 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1497 } 1498 1499 /* compute local quad vec */ 1500 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1501 if (!transpose) { 1502 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1503 } else { 1504 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1505 } 1506 ierr = VecSet(p,1.);CHKERRQ(ierr); 1507 if (!transpose) { 1508 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1509 } else { 1510 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1511 } 1512 if (vl2l) { 1513 Mat lA; 1514 VecScatter sc; 1515 1516 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1517 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1518 ierr = VecScatterCreate(v,NULL,vins,vl2l,&sc);CHKERRQ(ierr); 1519 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1522 } else { 1523 vins = v; 1524 } 1525 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1526 ierr = VecDestroy(&p);CHKERRQ(ierr); 1527 1528 /* insert in global quadrature vecs */ 1529 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 1530 for (i=1;i<n_neigh;i++) { 1531 const PetscInt *idxs; 1532 PetscInt idx,nn,j; 1533 1534 idxs = shared[i]; 1535 nn = n_shared[i]; 1536 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1537 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1538 idx = -(idx+1); 1539 if (idx < 0 || idx >= maxneighs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs); 1540 ierr = ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);CHKERRQ(ierr); 1541 ierr = VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1542 } 1543 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1544 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1545 if (vl2l) { 1546 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1547 } 1548 ierr = VecDestroy(&v);CHKERRQ(ierr); 1549 ierr = PetscFree2(gidxs,vals);CHKERRQ(ierr); 1550 1551 /* assemble near null space */ 1552 for (i=0;i<maxneighs;i++) { 1553 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1554 } 1555 for (i=0;i<maxneighs;i++) { 1556 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1557 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1558 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1559 } 1560 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1565 { 1566 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1567 PetscErrorCode ierr; 1568 1569 PetscFunctionBegin; 1570 if (primalv) { 1571 if (pcbddc->user_primal_vertices_local) { 1572 IS list[2], newp; 1573 1574 list[0] = primalv; 1575 list[1] = pcbddc->user_primal_vertices_local; 1576 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1577 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1578 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1579 pcbddc->user_primal_vertices_local = newp; 1580 } else { 1581 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1582 } 1583 } 1584 PetscFunctionReturn(0); 1585 } 1586 1587 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1588 { 1589 PetscInt f, *comp = (PetscInt *)ctx; 1590 1591 PetscFunctionBegin; 1592 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1593 PetscFunctionReturn(0); 1594 } 1595 1596 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1597 { 1598 PetscErrorCode ierr; 1599 Vec local,global; 1600 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1601 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1602 PetscBool monolithic = PETSC_FALSE; 1603 1604 PetscFunctionBegin; 1605 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1606 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1607 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1608 /* need to convert from global to local topology information and remove references to information in global ordering */ 1609 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1610 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1611 ierr = VecBindToCPU(global,PETSC_TRUE);CHKERRQ(ierr); 1612 ierr = VecBindToCPU(local,PETSC_TRUE);CHKERRQ(ierr); 1613 if (monolithic) { /* just get block size to properly compute vertices */ 1614 if (pcbddc->vertex_size == 1) { 1615 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1616 } 1617 goto boundary; 1618 } 1619 1620 if (pcbddc->user_provided_isfordofs) { 1621 if (pcbddc->n_ISForDofs) { 1622 PetscInt i; 1623 1624 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1625 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1626 PetscInt bs; 1627 1628 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1629 ierr = ISGetBlockSize(pcbddc->ISForDofs[i],&bs);CHKERRQ(ierr); 1630 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1631 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1632 } 1633 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1634 pcbddc->n_ISForDofs = 0; 1635 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1636 } 1637 } else { 1638 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1639 DM dm; 1640 1641 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1642 if (!dm) { 1643 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1644 } 1645 if (dm) { 1646 IS *fields; 1647 PetscInt nf,i; 1648 1649 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1650 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1651 for (i=0;i<nf;i++) { 1652 PetscInt bs; 1653 1654 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1655 ierr = ISGetBlockSize(fields[i],&bs);CHKERRQ(ierr); 1656 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1657 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1658 } 1659 ierr = PetscFree(fields);CHKERRQ(ierr); 1660 pcbddc->n_ISForDofsLocal = nf; 1661 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1662 PetscContainer c; 1663 1664 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1665 if (c) { 1666 MatISLocalFields lf; 1667 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1668 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1669 } else { /* fallback, create the default fields if bs > 1 */ 1670 PetscInt i, n = matis->A->rmap->n; 1671 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1672 if (i > 1) { 1673 pcbddc->n_ISForDofsLocal = i; 1674 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1675 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1676 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1677 } 1678 } 1679 } 1680 } 1681 } else { 1682 PetscInt i; 1683 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1684 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1685 } 1686 } 1687 } 1688 1689 boundary: 1690 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1691 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1692 } else if (pcbddc->DirichletBoundariesLocal) { 1693 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1694 } 1695 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1696 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1697 } else if (pcbddc->NeumannBoundariesLocal) { 1698 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1699 } 1700 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1701 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1702 } 1703 ierr = VecDestroy(&global);CHKERRQ(ierr); 1704 ierr = VecDestroy(&local);CHKERRQ(ierr); 1705 /* detect local disconnected subdomains if requested (use matis->A) */ 1706 if (pcbddc->detect_disconnected) { 1707 IS primalv = NULL; 1708 PetscInt i; 1709 PetscBool filter = pcbddc->detect_disconnected_filter; 1710 1711 for (i=0;i<pcbddc->n_local_subs;i++) { 1712 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1713 } 1714 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1715 ierr = PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1716 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1717 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1718 } 1719 /* early stage corner detection */ 1720 { 1721 DM dm; 1722 1723 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1724 if (!dm) { 1725 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1726 } 1727 if (dm) { 1728 PetscBool isda; 1729 1730 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1731 if (isda) { 1732 ISLocalToGlobalMapping l2l; 1733 IS corners; 1734 Mat lA; 1735 PetscBool gl,lo; 1736 1737 { 1738 Vec cvec; 1739 const PetscScalar *coords; 1740 PetscInt dof,n,cdim; 1741 PetscBool memc = PETSC_TRUE; 1742 1743 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1744 ierr = DMGetCoordinates(dm,&cvec);CHKERRQ(ierr); 1745 ierr = VecGetLocalSize(cvec,&n);CHKERRQ(ierr); 1746 ierr = VecGetBlockSize(cvec,&cdim);CHKERRQ(ierr); 1747 n /= cdim; 1748 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 1749 ierr = PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);CHKERRQ(ierr); 1750 ierr = VecGetArrayRead(cvec,&coords);CHKERRQ(ierr); 1751 #if defined(PETSC_USE_COMPLEX) 1752 memc = PETSC_FALSE; 1753 #endif 1754 if (dof != 1) memc = PETSC_FALSE; 1755 if (memc) { 1756 ierr = PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);CHKERRQ(ierr); 1757 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1758 PetscReal *bcoords = pcbddc->mat_graph->coords; 1759 PetscInt i, b, d; 1760 1761 for (i=0;i<n;i++) { 1762 for (b=0;b<dof;b++) { 1763 for (d=0;d<cdim;d++) { 1764 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1765 } 1766 } 1767 } 1768 } 1769 ierr = VecRestoreArrayRead(cvec,&coords);CHKERRQ(ierr); 1770 pcbddc->mat_graph->cdim = cdim; 1771 pcbddc->mat_graph->cnloc = dof*n; 1772 pcbddc->mat_graph->cloc = PETSC_FALSE; 1773 } 1774 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1775 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1776 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1777 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1778 lo = (PetscBool)(l2l && corners); 1779 ierr = MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 1780 if (gl) { /* From PETSc's DMDA */ 1781 const PetscInt *idx; 1782 PetscInt dof,bs,*idxout,n; 1783 1784 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1785 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1786 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1787 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1788 if (bs == dof) { 1789 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1790 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1791 } else { /* the original DMDA local-to-local map have been modified */ 1792 PetscInt i,d; 1793 1794 ierr = PetscMalloc1(dof*n,&idxout);CHKERRQ(ierr); 1795 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1796 ierr = ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);CHKERRQ(ierr); 1797 1798 bs = 1; 1799 n *= dof; 1800 } 1801 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1802 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1803 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1804 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1805 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1806 pcbddc->corner_selected = PETSC_TRUE; 1807 pcbddc->corner_selection = PETSC_TRUE; 1808 } 1809 if (corners) { 1810 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1811 } 1812 } 1813 } 1814 } 1815 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1816 DM dm; 1817 1818 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1819 if (!dm) { 1820 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1821 } 1822 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1823 Vec vcoords; 1824 PetscSection section; 1825 PetscReal *coords; 1826 PetscInt d,cdim,nl,nf,**ctxs; 1827 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1828 1829 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1830 ierr = DMGetLocalSection(dm,§ion);CHKERRQ(ierr); 1831 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1832 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1833 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1834 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1835 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1836 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1837 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1838 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1839 for (d=0;d<cdim;d++) { 1840 PetscInt i; 1841 const PetscScalar *v; 1842 1843 for (i=0;i<nf;i++) ctxs[i][0] = d; 1844 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1845 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1846 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1847 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1848 } 1849 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1850 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1851 ierr = PetscFree(coords);CHKERRQ(ierr); 1852 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1853 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1854 } 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1860 { 1861 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1862 PetscErrorCode ierr; 1863 IS nis; 1864 const PetscInt *idxs; 1865 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1866 PetscBool *ld; 1867 1868 PetscFunctionBegin; 1869 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1870 if (mop == MPI_LAND) { 1871 /* init rootdata with true */ 1872 ld = (PetscBool*) matis->sf_rootdata; 1873 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1874 } else { 1875 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 1876 } 1877 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 1878 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1879 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1880 ld = (PetscBool*) matis->sf_leafdata; 1881 for (i=0;i<nd;i++) 1882 if (-1 < idxs[i] && idxs[i] < n) 1883 ld[idxs[i]] = PETSC_TRUE; 1884 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1885 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1886 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1887 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 1888 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 1889 if (mop == MPI_LAND) { 1890 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1891 } else { 1892 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1893 } 1894 for (i=0,nnd=0;i<n;i++) 1895 if (ld[i]) 1896 nidxs[nnd++] = i; 1897 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1898 ierr = ISDestroy(is);CHKERRQ(ierr); 1899 *is = nis; 1900 PetscFunctionReturn(0); 1901 } 1902 1903 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1904 { 1905 PC_IS *pcis = (PC_IS*)(pc->data); 1906 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1907 PetscErrorCode ierr; 1908 1909 PetscFunctionBegin; 1910 if (!pcbddc->benign_have_null) { 1911 PetscFunctionReturn(0); 1912 } 1913 if (pcbddc->ChangeOfBasisMatrix) { 1914 Vec swap; 1915 1916 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1917 swap = pcbddc->work_change; 1918 pcbddc->work_change = r; 1919 r = swap; 1920 } 1921 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1922 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1923 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);CHKERRQ(ierr); 1924 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1925 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);CHKERRQ(ierr); 1926 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 1927 ierr = VecSet(z,0.);CHKERRQ(ierr); 1928 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1929 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1930 if (pcbddc->ChangeOfBasisMatrix) { 1931 pcbddc->work_change = r; 1932 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1933 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1934 } 1935 PetscFunctionReturn(0); 1936 } 1937 1938 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1939 { 1940 PCBDDCBenignMatMult_ctx ctx; 1941 PetscErrorCode ierr; 1942 PetscBool apply_right,apply_left,reset_x; 1943 1944 PetscFunctionBegin; 1945 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1946 if (transpose) { 1947 apply_right = ctx->apply_left; 1948 apply_left = ctx->apply_right; 1949 } else { 1950 apply_right = ctx->apply_right; 1951 apply_left = ctx->apply_left; 1952 } 1953 reset_x = PETSC_FALSE; 1954 if (apply_right) { 1955 const PetscScalar *ax; 1956 PetscInt nl,i; 1957 1958 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1959 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1960 ierr = PetscArraycpy(ctx->work,ax,nl);CHKERRQ(ierr); 1961 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1962 for (i=0;i<ctx->benign_n;i++) { 1963 PetscScalar sum,val; 1964 const PetscInt *idxs; 1965 PetscInt nz,j; 1966 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1967 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1968 sum = 0.; 1969 if (ctx->apply_p0) { 1970 val = ctx->work[idxs[nz-1]]; 1971 for (j=0;j<nz-1;j++) { 1972 sum += ctx->work[idxs[j]]; 1973 ctx->work[idxs[j]] += val; 1974 } 1975 } else { 1976 for (j=0;j<nz-1;j++) { 1977 sum += ctx->work[idxs[j]]; 1978 } 1979 } 1980 ctx->work[idxs[nz-1]] -= sum; 1981 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1982 } 1983 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1984 reset_x = PETSC_TRUE; 1985 } 1986 if (transpose) { 1987 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1988 } else { 1989 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1990 } 1991 if (reset_x) { 1992 ierr = VecResetArray(x);CHKERRQ(ierr); 1993 } 1994 if (apply_left) { 1995 PetscScalar *ay; 1996 PetscInt i; 1997 1998 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1999 for (i=0;i<ctx->benign_n;i++) { 2000 PetscScalar sum,val; 2001 const PetscInt *idxs; 2002 PetscInt nz,j; 2003 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2004 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2005 val = -ay[idxs[nz-1]]; 2006 if (ctx->apply_p0) { 2007 sum = 0.; 2008 for (j=0;j<nz-1;j++) { 2009 sum += ay[idxs[j]]; 2010 ay[idxs[j]] += val; 2011 } 2012 ay[idxs[nz-1]] += sum; 2013 } else { 2014 for (j=0;j<nz-1;j++) { 2015 ay[idxs[j]] += val; 2016 } 2017 ay[idxs[nz-1]] = 0.; 2018 } 2019 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2020 } 2021 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 2022 } 2023 PetscFunctionReturn(0); 2024 } 2025 2026 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2027 { 2028 PetscErrorCode ierr; 2029 2030 PetscFunctionBegin; 2031 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2036 { 2037 PetscErrorCode ierr; 2038 2039 PetscFunctionBegin; 2040 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2045 { 2046 PC_IS *pcis = (PC_IS*)pc->data; 2047 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2048 PCBDDCBenignMatMult_ctx ctx; 2049 PetscErrorCode ierr; 2050 2051 PetscFunctionBegin; 2052 if (!restore) { 2053 Mat A_IB,A_BI; 2054 PetscScalar *work; 2055 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2056 2057 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2058 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2059 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2060 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2061 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2062 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2063 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2064 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2065 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2066 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2067 ctx->apply_left = PETSC_TRUE; 2068 ctx->apply_right = PETSC_FALSE; 2069 ctx->apply_p0 = PETSC_FALSE; 2070 ctx->benign_n = pcbddc->benign_n; 2071 if (reuse) { 2072 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2073 ctx->free = PETSC_FALSE; 2074 } else { /* TODO: could be optimized for successive solves */ 2075 ISLocalToGlobalMapping N_to_D; 2076 PetscInt i; 2077 2078 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2079 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2080 for (i=0;i<pcbddc->benign_n;i++) { 2081 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2082 } 2083 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2084 ctx->free = PETSC_TRUE; 2085 } 2086 ctx->A = pcis->A_IB; 2087 ctx->work = work; 2088 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2089 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2090 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2091 pcis->A_IB = A_IB; 2092 2093 /* A_BI as A_IB^T */ 2094 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2095 pcbddc->benign_original_mat = pcis->A_BI; 2096 pcis->A_BI = A_BI; 2097 } else { 2098 if (!pcbddc->benign_original_mat) { 2099 PetscFunctionReturn(0); 2100 } 2101 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2102 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2103 pcis->A_IB = ctx->A; 2104 ctx->A = NULL; 2105 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2106 pcis->A_BI = pcbddc->benign_original_mat; 2107 pcbddc->benign_original_mat = NULL; 2108 if (ctx->free) { 2109 PetscInt i; 2110 for (i=0;i<ctx->benign_n;i++) { 2111 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2112 } 2113 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2114 } 2115 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2116 ierr = PetscFree(ctx);CHKERRQ(ierr); 2117 } 2118 PetscFunctionReturn(0); 2119 } 2120 2121 /* used just in bddc debug mode */ 2122 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2123 { 2124 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2125 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2126 Mat An; 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2131 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2132 if (is1) { 2133 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2134 ierr = MatDestroy(&An);CHKERRQ(ierr); 2135 } else { 2136 *B = An; 2137 } 2138 PetscFunctionReturn(0); 2139 } 2140 2141 /* TODO: add reuse flag */ 2142 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2143 { 2144 Mat Bt; 2145 PetscScalar *a,*bdata; 2146 const PetscInt *ii,*ij; 2147 PetscInt m,n,i,nnz,*bii,*bij; 2148 PetscBool flg_row; 2149 PetscErrorCode ierr; 2150 2151 PetscFunctionBegin; 2152 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2153 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2154 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2155 nnz = n; 2156 for (i=0;i<ii[n];i++) { 2157 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2158 } 2159 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2160 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2161 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2162 nnz = 0; 2163 bii[0] = 0; 2164 for (i=0;i<n;i++) { 2165 PetscInt j; 2166 for (j=ii[i];j<ii[i+1];j++) { 2167 PetscScalar entry = a[j]; 2168 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2169 bij[nnz] = ij[j]; 2170 bdata[nnz] = entry; 2171 nnz++; 2172 } 2173 } 2174 bii[i+1] = nnz; 2175 } 2176 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2177 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2178 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2179 { 2180 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2181 b->free_a = PETSC_TRUE; 2182 b->free_ij = PETSC_TRUE; 2183 } 2184 if (*B == A) { 2185 ierr = MatDestroy(&A);CHKERRQ(ierr); 2186 } 2187 *B = Bt; 2188 PetscFunctionReturn(0); 2189 } 2190 2191 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2192 { 2193 Mat B = NULL; 2194 DM dm; 2195 IS is_dummy,*cc_n; 2196 ISLocalToGlobalMapping l2gmap_dummy; 2197 PCBDDCGraph graph; 2198 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2199 PetscInt i,n; 2200 PetscInt *xadj,*adjncy; 2201 PetscBool isplex = PETSC_FALSE; 2202 PetscErrorCode ierr; 2203 2204 PetscFunctionBegin; 2205 if (ncc) *ncc = 0; 2206 if (cc) *cc = NULL; 2207 if (primalv) *primalv = NULL; 2208 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2209 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2210 if (!dm) { 2211 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2212 } 2213 if (dm) { 2214 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2215 } 2216 if (filter) isplex = PETSC_FALSE; 2217 2218 if (isplex) { /* this code has been modified from plexpartition.c */ 2219 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2220 PetscInt *adj = NULL; 2221 IS cellNumbering; 2222 const PetscInt *cellNum; 2223 PetscBool useCone, useClosure; 2224 PetscSection section; 2225 PetscSegBuffer adjBuffer; 2226 PetscSF sfPoint; 2227 PetscErrorCode ierr; 2228 2229 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2230 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2231 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2232 /* Build adjacency graph via a section/segbuffer */ 2233 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2234 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2235 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2236 /* Always use FVM adjacency to create partitioner graph */ 2237 ierr = DMGetBasicAdjacency(dm, &useCone, &useClosure);CHKERRQ(ierr); 2238 ierr = DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);CHKERRQ(ierr); 2239 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2240 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2241 for (n = 0, p = pStart; p < pEnd; p++) { 2242 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2243 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2244 adjSize = PETSC_DETERMINE; 2245 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2246 for (a = 0; a < adjSize; ++a) { 2247 const PetscInt point = adj[a]; 2248 if (pStart <= point && point < pEnd) { 2249 PetscInt *PETSC_RESTRICT pBuf; 2250 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2251 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2252 *pBuf = point; 2253 } 2254 } 2255 n++; 2256 } 2257 ierr = DMSetBasicAdjacency(dm, useCone, useClosure);CHKERRQ(ierr); 2258 /* Derive CSR graph from section/segbuffer */ 2259 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2260 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2261 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2262 for (idx = 0, p = pStart; p < pEnd; p++) { 2263 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2264 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2265 } 2266 xadj[n] = size; 2267 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2268 /* Clean up */ 2269 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2270 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2271 ierr = PetscFree(adj);CHKERRQ(ierr); 2272 graph->xadj = xadj; 2273 graph->adjncy = adjncy; 2274 } else { 2275 Mat A; 2276 PetscBool isseqaij, flg_row; 2277 2278 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2279 if (!A->rmap->N || !A->cmap->N) { 2280 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2281 PetscFunctionReturn(0); 2282 } 2283 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2284 if (!isseqaij && filter) { 2285 PetscBool isseqdense; 2286 2287 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2288 if (!isseqdense) { 2289 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2290 } else { /* TODO: rectangular case and LDA */ 2291 PetscScalar *array; 2292 PetscReal chop=1.e-6; 2293 2294 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2295 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2296 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2297 for (i=0;i<n;i++) { 2298 PetscInt j; 2299 for (j=i+1;j<n;j++) { 2300 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2301 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2302 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2303 } 2304 } 2305 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2306 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2307 } 2308 } else { 2309 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2310 B = A; 2311 } 2312 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2313 2314 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2315 if (filter) { 2316 PetscScalar *data; 2317 PetscInt j,cum; 2318 2319 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2320 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2321 cum = 0; 2322 for (i=0;i<n;i++) { 2323 PetscInt t; 2324 2325 for (j=xadj[i];j<xadj[i+1];j++) { 2326 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2327 continue; 2328 } 2329 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2330 } 2331 t = xadj_filtered[i]; 2332 xadj_filtered[i] = cum; 2333 cum += t; 2334 } 2335 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2336 graph->xadj = xadj_filtered; 2337 graph->adjncy = adjncy_filtered; 2338 } else { 2339 graph->xadj = xadj; 2340 graph->adjncy = adjncy; 2341 } 2342 } 2343 /* compute local connected components using PCBDDCGraph */ 2344 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2345 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2346 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2347 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2348 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2349 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2350 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2351 2352 /* partial clean up */ 2353 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2354 if (B) { 2355 PetscBool flg_row; 2356 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2357 ierr = MatDestroy(&B);CHKERRQ(ierr); 2358 } 2359 if (isplex) { 2360 ierr = PetscFree(xadj);CHKERRQ(ierr); 2361 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2362 } 2363 2364 /* get back data */ 2365 if (isplex) { 2366 if (ncc) *ncc = graph->ncc; 2367 if (cc || primalv) { 2368 Mat A; 2369 PetscBT btv,btvt; 2370 PetscSection subSection; 2371 PetscInt *ids,cum,cump,*cids,*pids; 2372 2373 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2374 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2375 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2376 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2377 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2378 2379 cids[0] = 0; 2380 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2381 PetscInt j; 2382 2383 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2384 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2385 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2386 2387 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2388 for (k = 0; k < 2*size; k += 2) { 2389 PetscInt s, pp, p = closure[k], off, dof, cdof; 2390 2391 ierr = PetscSectionGetConstraintDof(subSection,p,&cdof);CHKERRQ(ierr); 2392 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2393 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2394 for (s = 0; s < dof-cdof; s++) { 2395 if (PetscBTLookupSet(btvt,off+s)) continue; 2396 if (!PetscBTLookup(btv,off+s)) { 2397 ids[cum++] = off+s; 2398 } else { /* cross-vertex */ 2399 pids[cump++] = off+s; 2400 } 2401 } 2402 ierr = DMPlexGetTreeParent(dm,p,&pp,NULL);CHKERRQ(ierr); 2403 if (pp != p) { 2404 ierr = PetscSectionGetConstraintDof(subSection,pp,&cdof);CHKERRQ(ierr); 2405 ierr = PetscSectionGetOffset(subSection,pp,&off);CHKERRQ(ierr); 2406 ierr = PetscSectionGetDof(subSection,pp,&dof);CHKERRQ(ierr); 2407 for (s = 0; s < dof-cdof; s++) { 2408 if (PetscBTLookupSet(btvt,off+s)) continue; 2409 if (!PetscBTLookup(btv,off+s)) { 2410 ids[cum++] = off+s; 2411 } else { /* cross-vertex */ 2412 pids[cump++] = off+s; 2413 } 2414 } 2415 } 2416 } 2417 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2418 } 2419 cids[i+1] = cum; 2420 /* mark dofs as already assigned */ 2421 for (j = cids[i]; j < cids[i+1]; j++) { 2422 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2423 } 2424 } 2425 if (cc) { 2426 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2427 for (i = 0; i < graph->ncc; i++) { 2428 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2429 } 2430 *cc = cc_n; 2431 } 2432 if (primalv) { 2433 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2434 } 2435 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2436 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2437 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2438 } 2439 } else { 2440 if (ncc) *ncc = graph->ncc; 2441 if (cc) { 2442 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2443 for (i=0;i<graph->ncc;i++) { 2444 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2445 } 2446 *cc = cc_n; 2447 } 2448 } 2449 /* clean up graph */ 2450 graph->xadj = NULL; 2451 graph->adjncy = NULL; 2452 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2457 { 2458 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2459 PC_IS* pcis = (PC_IS*)(pc->data); 2460 IS dirIS = NULL; 2461 PetscInt i; 2462 PetscErrorCode ierr; 2463 2464 PetscFunctionBegin; 2465 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2466 if (zerodiag) { 2467 Mat A; 2468 Vec vec3_N; 2469 PetscScalar *vals; 2470 const PetscInt *idxs; 2471 PetscInt nz,*count; 2472 2473 /* p0 */ 2474 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2475 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2476 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2477 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2478 for (i=0;i<nz;i++) vals[i] = 1.; 2479 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2480 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2481 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2482 /* v_I */ 2483 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2484 for (i=0;i<nz;i++) vals[i] = 0.; 2485 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2486 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2487 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2488 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2489 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2490 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2491 if (dirIS) { 2492 PetscInt n; 2493 2494 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2495 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2496 for (i=0;i<n;i++) vals[i] = 0.; 2497 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2498 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2499 } 2500 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2501 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2502 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2503 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2504 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2505 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2506 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2507 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2508 ierr = PetscFree(vals);CHKERRQ(ierr); 2509 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2510 2511 /* there should not be any pressure dofs lying on the interface */ 2512 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2513 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2514 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2515 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2516 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2517 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2518 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2519 ierr = PetscFree(count);CHKERRQ(ierr); 2520 } 2521 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2522 2523 /* check PCBDDCBenignGetOrSetP0 */ 2524 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2525 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2526 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2527 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2528 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2529 for (i=0;i<pcbddc->benign_n;i++) { 2530 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2531 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2532 } 2533 PetscFunctionReturn(0); 2534 } 2535 2536 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2537 { 2538 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2539 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2540 PetscInt nz,n,benign_n,bsp = 1; 2541 PetscInt *interior_dofs,n_interior_dofs,nneu; 2542 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2543 PetscErrorCode ierr; 2544 2545 PetscFunctionBegin; 2546 if (reuse) goto project_b0; 2547 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2548 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2549 for (n=0;n<pcbddc->benign_n;n++) { 2550 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2551 } 2552 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2553 has_null_pressures = PETSC_TRUE; 2554 have_null = PETSC_TRUE; 2555 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2556 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2557 Checks if all the pressure dofs in each subdomain have a zero diagonal 2558 If not, a change of basis on pressures is not needed 2559 since the local Schur complements are already SPD 2560 */ 2561 if (pcbddc->n_ISForDofsLocal) { 2562 IS iP = NULL; 2563 PetscInt p,*pp; 2564 PetscBool flg; 2565 2566 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);CHKERRQ(ierr); 2567 n = pcbddc->n_ISForDofsLocal; 2568 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2569 ierr = PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);CHKERRQ(ierr); 2570 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2571 if (!flg) { 2572 n = 1; 2573 pp[0] = pcbddc->n_ISForDofsLocal-1; 2574 } 2575 2576 bsp = 0; 2577 for (p=0;p<n;p++) { 2578 PetscInt bs; 2579 2580 if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2581 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2582 bsp += bs; 2583 } 2584 ierr = PetscMalloc1(bsp,&bzerodiag);CHKERRQ(ierr); 2585 bsp = 0; 2586 for (p=0;p<n;p++) { 2587 const PetscInt *idxs; 2588 PetscInt b,bs,npl,*bidxs; 2589 2590 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2591 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);CHKERRQ(ierr); 2592 ierr = ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2593 ierr = PetscMalloc1(npl/bs,&bidxs);CHKERRQ(ierr); 2594 for (b=0;b<bs;b++) { 2595 PetscInt i; 2596 2597 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2598 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);CHKERRQ(ierr); 2599 bsp++; 2600 } 2601 ierr = PetscFree(bidxs);CHKERRQ(ierr); 2602 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2603 } 2604 ierr = ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);CHKERRQ(ierr); 2605 2606 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2607 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2608 if (iP) { 2609 IS newpressures; 2610 2611 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2612 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2613 pressures = newpressures; 2614 } 2615 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2616 if (!sorted) { 2617 ierr = ISSort(pressures);CHKERRQ(ierr); 2618 } 2619 ierr = PetscFree(pp);CHKERRQ(ierr); 2620 } 2621 2622 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2623 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2624 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2625 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2626 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2627 if (!sorted) { 2628 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2629 } 2630 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2631 zerodiag_save = zerodiag; 2632 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2633 if (!nz) { 2634 if (n) have_null = PETSC_FALSE; 2635 has_null_pressures = PETSC_FALSE; 2636 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2637 } 2638 recompute_zerodiag = PETSC_FALSE; 2639 2640 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2641 zerodiag_subs = NULL; 2642 benign_n = 0; 2643 n_interior_dofs = 0; 2644 interior_dofs = NULL; 2645 nneu = 0; 2646 if (pcbddc->NeumannBoundariesLocal) { 2647 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2648 } 2649 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2650 if (checkb) { /* need to compute interior nodes */ 2651 PetscInt n,i,j; 2652 PetscInt n_neigh,*neigh,*n_shared,**shared; 2653 PetscInt *iwork; 2654 2655 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2656 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2657 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2658 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2659 for (i=1;i<n_neigh;i++) 2660 for (j=0;j<n_shared[i];j++) 2661 iwork[shared[i][j]] += 1; 2662 for (i=0;i<n;i++) 2663 if (!iwork[i]) 2664 interior_dofs[n_interior_dofs++] = i; 2665 ierr = PetscFree(iwork);CHKERRQ(ierr); 2666 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2667 } 2668 if (has_null_pressures) { 2669 IS *subs; 2670 PetscInt nsubs,i,j,nl; 2671 const PetscInt *idxs; 2672 PetscScalar *array; 2673 Vec *work; 2674 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2675 2676 subs = pcbddc->local_subs; 2677 nsubs = pcbddc->n_local_subs; 2678 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2679 if (checkb) { 2680 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2681 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2682 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2683 /* work[0] = 1_p */ 2684 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2685 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2686 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2687 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2688 /* work[0] = 1_v */ 2689 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2690 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2691 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2692 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2693 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2694 } 2695 2696 if (nsubs > 1 || bsp > 1) { 2697 IS *is; 2698 PetscInt b,totb; 2699 2700 totb = bsp; 2701 is = bsp > 1 ? bzerodiag : &zerodiag; 2702 nsubs = PetscMax(nsubs,1); 2703 ierr = PetscCalloc1(nsubs*totb,&zerodiag_subs);CHKERRQ(ierr); 2704 for (b=0;b<totb;b++) { 2705 for (i=0;i<nsubs;i++) { 2706 ISLocalToGlobalMapping l2g; 2707 IS t_zerodiag_subs; 2708 PetscInt nl; 2709 2710 if (subs) { 2711 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2712 } else { 2713 IS tis; 2714 2715 ierr = MatGetLocalSize(pcbddc->local_mat,&nl,NULL);CHKERRQ(ierr); 2716 ierr = ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);CHKERRQ(ierr); 2717 ierr = ISLocalToGlobalMappingCreateIS(tis,&l2g);CHKERRQ(ierr); 2718 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2719 } 2720 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);CHKERRQ(ierr); 2721 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2722 if (nl) { 2723 PetscBool valid = PETSC_TRUE; 2724 2725 if (checkb) { 2726 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2727 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2728 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2729 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2730 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2731 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2732 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2733 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2734 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2735 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2736 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2737 for (j=0;j<n_interior_dofs;j++) { 2738 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2739 valid = PETSC_FALSE; 2740 break; 2741 } 2742 } 2743 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2744 } 2745 if (valid && nneu) { 2746 const PetscInt *idxs; 2747 PetscInt nzb; 2748 2749 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2750 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2751 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2752 if (nzb) valid = PETSC_FALSE; 2753 } 2754 if (valid && pressures) { 2755 IS t_pressure_subs,tmp; 2756 PetscInt i1,i2; 2757 2758 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2759 ierr = ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);CHKERRQ(ierr); 2760 ierr = ISGetLocalSize(tmp,&i1);CHKERRQ(ierr); 2761 ierr = ISGetLocalSize(t_zerodiag_subs,&i2);CHKERRQ(ierr); 2762 if (i2 != i1) valid = PETSC_FALSE; 2763 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2764 ierr = ISDestroy(&tmp);CHKERRQ(ierr); 2765 } 2766 if (valid) { 2767 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);CHKERRQ(ierr); 2768 benign_n++; 2769 } else recompute_zerodiag = PETSC_TRUE; 2770 } 2771 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2772 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2773 } 2774 } 2775 } else { /* there's just one subdomain (or zero if they have not been detected */ 2776 PetscBool valid = PETSC_TRUE; 2777 2778 if (nneu) valid = PETSC_FALSE; 2779 if (valid && pressures) { 2780 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2781 } 2782 if (valid && checkb) { 2783 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2784 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2785 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2786 for (j=0;j<n_interior_dofs;j++) { 2787 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2788 valid = PETSC_FALSE; 2789 break; 2790 } 2791 } 2792 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2793 } 2794 if (valid) { 2795 benign_n = 1; 2796 ierr = PetscMalloc1(benign_n,&zerodiag_subs);CHKERRQ(ierr); 2797 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2798 zerodiag_subs[0] = zerodiag; 2799 } 2800 } 2801 if (checkb) { 2802 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2803 } 2804 } 2805 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2806 2807 if (!benign_n) { 2808 PetscInt n; 2809 2810 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2811 recompute_zerodiag = PETSC_FALSE; 2812 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2813 if (n) have_null = PETSC_FALSE; 2814 } 2815 2816 /* final check for null pressures */ 2817 if (zerodiag && pressures) { 2818 ierr = ISEqual(pressures,zerodiag,&have_null);CHKERRQ(ierr); 2819 } 2820 2821 if (recompute_zerodiag) { 2822 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2823 if (benign_n == 1) { 2824 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2825 zerodiag = zerodiag_subs[0]; 2826 } else { 2827 PetscInt i,nzn,*new_idxs; 2828 2829 nzn = 0; 2830 for (i=0;i<benign_n;i++) { 2831 PetscInt ns; 2832 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2833 nzn += ns; 2834 } 2835 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2836 nzn = 0; 2837 for (i=0;i<benign_n;i++) { 2838 PetscInt ns,*idxs; 2839 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2840 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2841 ierr = PetscArraycpy(new_idxs+nzn,idxs,ns);CHKERRQ(ierr); 2842 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2843 nzn += ns; 2844 } 2845 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2846 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2847 } 2848 have_null = PETSC_FALSE; 2849 } 2850 2851 /* determines if the coarse solver will be singular or not */ 2852 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 2853 2854 /* Prepare matrix to compute no-net-flux */ 2855 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2856 Mat A,loc_divudotp; 2857 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2858 IS row,col,isused = NULL; 2859 PetscInt M,N,n,st,n_isused; 2860 2861 if (pressures) { 2862 isused = pressures; 2863 } else { 2864 isused = zerodiag_save; 2865 } 2866 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2867 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2868 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2869 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2870 n_isused = 0; 2871 if (isused) { 2872 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2873 } 2874 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 2875 st = st-n_isused; 2876 if (n) { 2877 const PetscInt *gidxs; 2878 2879 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2880 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2881 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2882 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2883 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2884 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2885 } else { 2886 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2887 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2888 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2889 } 2890 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2891 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2892 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2893 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2894 ierr = ISDestroy(&row);CHKERRQ(ierr); 2895 ierr = ISDestroy(&col);CHKERRQ(ierr); 2896 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2897 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2898 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2899 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2900 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2901 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2902 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2903 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2904 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2906 } 2907 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2908 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2909 if (bzerodiag) { 2910 PetscInt i; 2911 2912 for (i=0;i<bsp;i++) { 2913 ierr = ISDestroy(&bzerodiag[i]);CHKERRQ(ierr); 2914 } 2915 ierr = PetscFree(bzerodiag);CHKERRQ(ierr); 2916 } 2917 pcbddc->benign_n = benign_n; 2918 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2919 2920 /* determines if the problem has subdomains with 0 pressure block */ 2921 have_null = (PetscBool)(!!pcbddc->benign_n); 2922 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 2923 2924 project_b0: 2925 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2926 /* change of basis and p0 dofs */ 2927 if (pcbddc->benign_n) { 2928 PetscInt i,s,*nnz; 2929 2930 /* local change of basis for pressures */ 2931 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2932 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2933 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2934 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2935 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2936 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2937 for (i=0;i<pcbddc->benign_n;i++) { 2938 const PetscInt *idxs; 2939 PetscInt nzs,j; 2940 2941 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2942 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2943 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2944 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2945 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2946 } 2947 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2948 ierr = MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2949 ierr = PetscFree(nnz);CHKERRQ(ierr); 2950 /* set identity by default */ 2951 for (i=0;i<n;i++) { 2952 ierr = MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);CHKERRQ(ierr); 2953 } 2954 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2955 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2956 /* set change on pressures */ 2957 for (s=0;s<pcbddc->benign_n;s++) { 2958 PetscScalar *array; 2959 const PetscInt *idxs; 2960 PetscInt nzs; 2961 2962 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2963 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2964 for (i=0;i<nzs-1;i++) { 2965 PetscScalar vals[2]; 2966 PetscInt cols[2]; 2967 2968 cols[0] = idxs[i]; 2969 cols[1] = idxs[nzs-1]; 2970 vals[0] = 1.; 2971 vals[1] = 1.; 2972 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2973 } 2974 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2975 for (i=0;i<nzs-1;i++) array[i] = -1.; 2976 array[nzs-1] = 1.; 2977 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2978 /* store local idxs for p0 */ 2979 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2980 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2981 ierr = PetscFree(array);CHKERRQ(ierr); 2982 } 2983 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2984 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2985 2986 /* project if needed */ 2987 if (pcbddc->benign_change_explicit) { 2988 Mat M; 2989 2990 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2991 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2992 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2993 ierr = MatDestroy(&M);CHKERRQ(ierr); 2994 } 2995 /* store global idxs for p0 */ 2996 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2997 } 2998 *zerodiaglocal = zerodiag; 2999 PetscFunctionReturn(0); 3000 } 3001 3002 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3003 { 3004 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3005 PetscScalar *array; 3006 PetscErrorCode ierr; 3007 3008 PetscFunctionBegin; 3009 if (!pcbddc->benign_sf) { 3010 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 3011 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 3012 } 3013 if (get) { 3014 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3015 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);CHKERRQ(ierr); 3016 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);CHKERRQ(ierr); 3017 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3018 } else { 3019 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 3020 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);CHKERRQ(ierr); 3021 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);CHKERRQ(ierr); 3022 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 3023 } 3024 PetscFunctionReturn(0); 3025 } 3026 3027 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3028 { 3029 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3030 PetscErrorCode ierr; 3031 3032 PetscFunctionBegin; 3033 /* TODO: add error checking 3034 - avoid nested pop (or push) calls. 3035 - cannot push before pop. 3036 - cannot call this if pcbddc->local_mat is NULL 3037 */ 3038 if (!pcbddc->benign_n) { 3039 PetscFunctionReturn(0); 3040 } 3041 if (pop) { 3042 if (pcbddc->benign_change_explicit) { 3043 IS is_p0; 3044 MatReuse reuse; 3045 3046 /* extract B_0 */ 3047 reuse = MAT_INITIAL_MATRIX; 3048 if (pcbddc->benign_B0) { 3049 reuse = MAT_REUSE_MATRIX; 3050 } 3051 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 3052 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 3053 /* remove rows and cols from local problem */ 3054 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 3055 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3056 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 3057 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3058 } else { 3059 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3060 PetscScalar *vals; 3061 PetscInt i,n,*idxs_ins; 3062 3063 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 3064 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 3065 if (!pcbddc->benign_B0) { 3066 PetscInt *nnz; 3067 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 3068 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 3069 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3070 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 3071 for (i=0;i<pcbddc->benign_n;i++) { 3072 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 3073 nnz[i] = n - nnz[i]; 3074 } 3075 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 3076 ierr = MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3077 ierr = PetscFree(nnz);CHKERRQ(ierr); 3078 } 3079 3080 for (i=0;i<pcbddc->benign_n;i++) { 3081 PetscScalar *array; 3082 PetscInt *idxs,j,nz,cum; 3083 3084 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 3085 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3086 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3087 for (j=0;j<nz;j++) vals[j] = 1.; 3088 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3089 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 3090 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 3091 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 3092 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 3093 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3094 cum = 0; 3095 for (j=0;j<n;j++) { 3096 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3097 vals[cum] = array[j]; 3098 idxs_ins[cum] = j; 3099 cum++; 3100 } 3101 } 3102 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3103 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3104 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3105 } 3106 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3107 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3108 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3109 } 3110 } else { /* push */ 3111 if (pcbddc->benign_change_explicit) { 3112 PetscInt i; 3113 3114 for (i=0;i<pcbddc->benign_n;i++) { 3115 PetscScalar *B0_vals; 3116 PetscInt *B0_cols,B0_ncol; 3117 3118 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3119 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3120 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3121 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3122 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3123 } 3124 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3125 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3126 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3127 } 3128 PetscFunctionReturn(0); 3129 } 3130 3131 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3132 { 3133 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3134 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3135 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3136 PetscBLASInt *B_iwork,*B_ifail; 3137 PetscScalar *work,lwork; 3138 PetscScalar *St,*S,*eigv; 3139 PetscScalar *Sarray,*Starray; 3140 PetscReal *eigs,thresh,lthresh,uthresh; 3141 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3142 PetscBool allocated_S_St; 3143 #if defined(PETSC_USE_COMPLEX) 3144 PetscReal *rwork; 3145 #endif 3146 PetscErrorCode ierr; 3147 3148 PetscFunctionBegin; 3149 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3150 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3151 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3152 ierr = PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3153 3154 if (pcbddc->dbg_flag) { 3155 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3156 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3157 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3158 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3159 } 3160 3161 if (pcbddc->dbg_flag) { 3162 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);CHKERRQ(ierr); 3163 } 3164 3165 /* max size of subsets */ 3166 mss = 0; 3167 for (i=0;i<sub_schurs->n_subs;i++) { 3168 PetscInt subset_size; 3169 3170 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3171 mss = PetscMax(mss,subset_size); 3172 } 3173 3174 /* min/max and threshold */ 3175 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3176 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3177 nmax = PetscMax(nmin,nmax); 3178 allocated_S_St = PETSC_FALSE; 3179 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3180 allocated_S_St = PETSC_TRUE; 3181 } 3182 3183 /* allocate lapack workspace */ 3184 cum = cum2 = 0; 3185 maxneigs = 0; 3186 for (i=0;i<sub_schurs->n_subs;i++) { 3187 PetscInt n,subset_size; 3188 3189 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3190 n = PetscMin(subset_size,nmax); 3191 cum += subset_size; 3192 cum2 += subset_size*n; 3193 maxneigs = PetscMax(maxneigs,n); 3194 } 3195 lwork = 0; 3196 if (mss) { 3197 if (sub_schurs->is_symmetric) { 3198 PetscScalar sdummy = 0.; 3199 PetscBLASInt B_itype = 1; 3200 PetscBLASInt B_N = mss, idummy = 0; 3201 PetscReal rdummy = 0.,zero = 0.0; 3202 PetscReal eps = 0.0; /* dlamch? */ 3203 3204 B_lwork = -1; 3205 /* some implementations may complain about NULL pointers, even if we are querying */ 3206 S = &sdummy; 3207 St = &sdummy; 3208 eigs = &rdummy; 3209 eigv = &sdummy; 3210 B_iwork = &idummy; 3211 B_ifail = &idummy; 3212 #if defined(PETSC_USE_COMPLEX) 3213 rwork = &rdummy; 3214 #endif 3215 thresh = 1.0; 3216 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3217 #if defined(PETSC_USE_COMPLEX) 3218 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3219 #else 3220 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3221 #endif 3222 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3223 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3224 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3225 } 3226 3227 nv = 0; 3228 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3229 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3230 } 3231 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3232 if (allocated_S_St) { 3233 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3234 } 3235 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3236 #if defined(PETSC_USE_COMPLEX) 3237 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3238 #endif 3239 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3240 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3241 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3242 nv+cum,&pcbddc->adaptive_constraints_idxs, 3243 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3244 ierr = PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);CHKERRQ(ierr); 3245 3246 maxneigs = 0; 3247 cum = cumarray = 0; 3248 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3249 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3250 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3251 const PetscInt *idxs; 3252 3253 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3254 for (cum=0;cum<nv;cum++) { 3255 pcbddc->adaptive_constraints_n[cum] = 1; 3256 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3257 pcbddc->adaptive_constraints_data[cum] = 1.0; 3258 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3259 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3260 } 3261 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3262 } 3263 3264 if (mss) { /* multilevel */ 3265 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3266 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3267 } 3268 3269 lthresh = pcbddc->adaptive_threshold[0]; 3270 uthresh = pcbddc->adaptive_threshold[1]; 3271 for (i=0;i<sub_schurs->n_subs;i++) { 3272 const PetscInt *idxs; 3273 PetscReal upper,lower; 3274 PetscInt j,subset_size,eigs_start = 0; 3275 PetscBLASInt B_N; 3276 PetscBool same_data = PETSC_FALSE; 3277 PetscBool scal = PETSC_FALSE; 3278 3279 if (pcbddc->use_deluxe_scaling) { 3280 upper = PETSC_MAX_REAL; 3281 lower = uthresh; 3282 } else { 3283 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3284 upper = 1./uthresh; 3285 lower = 0.; 3286 } 3287 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3288 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3289 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3290 /* this is experimental: we assume the dofs have been properly grouped to have 3291 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3292 if (!sub_schurs->is_posdef) { 3293 Mat T; 3294 3295 for (j=0;j<subset_size;j++) { 3296 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3297 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3298 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3299 ierr = MatDestroy(&T);CHKERRQ(ierr); 3300 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3301 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3302 ierr = MatDestroy(&T);CHKERRQ(ierr); 3303 if (sub_schurs->change_primal_sub) { 3304 PetscInt nz,k; 3305 const PetscInt *idxs; 3306 3307 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3308 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3309 for (k=0;k<nz;k++) { 3310 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3311 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3312 } 3313 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3314 } 3315 scal = PETSC_TRUE; 3316 break; 3317 } 3318 } 3319 } 3320 3321 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3322 if (sub_schurs->is_symmetric) { 3323 PetscInt j,k; 3324 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3325 ierr = PetscArrayzero(S,subset_size*subset_size);CHKERRQ(ierr); 3326 ierr = PetscArrayzero(St,subset_size*subset_size);CHKERRQ(ierr); 3327 } 3328 for (j=0;j<subset_size;j++) { 3329 for (k=j;k<subset_size;k++) { 3330 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3331 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3332 } 3333 } 3334 } else { 3335 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3336 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3337 } 3338 } else { 3339 S = Sarray + cumarray; 3340 St = Starray + cumarray; 3341 } 3342 /* see if we can save some work */ 3343 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3344 ierr = PetscArraycmp(S,St,subset_size*subset_size,&same_data);CHKERRQ(ierr); 3345 } 3346 3347 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3348 B_neigs = 0; 3349 } else { 3350 if (sub_schurs->is_symmetric) { 3351 PetscBLASInt B_itype = 1; 3352 PetscBLASInt B_IL, B_IU; 3353 PetscReal eps = -1.0; /* dlamch? */ 3354 PetscInt nmin_s; 3355 PetscBool compute_range; 3356 3357 B_neigs = 0; 3358 compute_range = (PetscBool)!same_data; 3359 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3360 3361 if (pcbddc->dbg_flag) { 3362 PetscInt nc = 0; 3363 3364 if (sub_schurs->change_primal_sub) { 3365 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3366 } 3367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3368 } 3369 3370 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3371 if (compute_range) { 3372 3373 /* ask for eigenvalues larger than thresh */ 3374 if (sub_schurs->is_posdef) { 3375 #if defined(PETSC_USE_COMPLEX) 3376 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3377 #else 3378 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3379 #endif 3380 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3381 } else { /* no theory so far, but it works nicely */ 3382 PetscInt recipe = 0,recipe_m = 1; 3383 PetscReal bb[2]; 3384 3385 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3386 switch (recipe) { 3387 case 0: 3388 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3389 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3390 #if defined(PETSC_USE_COMPLEX) 3391 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3392 #else 3393 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3394 #endif 3395 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3396 break; 3397 case 1: 3398 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3399 #if defined(PETSC_USE_COMPLEX) 3400 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3401 #else 3402 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3403 #endif 3404 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3405 if (!scal) { 3406 PetscBLASInt B_neigs2 = 0; 3407 3408 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3409 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3410 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3411 #if defined(PETSC_USE_COMPLEX) 3412 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3413 #else 3414 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3415 #endif 3416 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3417 B_neigs += B_neigs2; 3418 } 3419 break; 3420 case 2: 3421 if (scal) { 3422 bb[0] = PETSC_MIN_REAL; 3423 bb[1] = 0; 3424 #if defined(PETSC_USE_COMPLEX) 3425 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3426 #else 3427 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3428 #endif 3429 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3430 } else { 3431 PetscBLASInt B_neigs2 = 0; 3432 PetscBool import = PETSC_FALSE; 3433 3434 lthresh = PetscMax(lthresh,0.0); 3435 if (lthresh > 0.0) { 3436 bb[0] = PETSC_MIN_REAL; 3437 bb[1] = lthresh*lthresh; 3438 3439 import = PETSC_TRUE; 3440 #if defined(PETSC_USE_COMPLEX) 3441 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3442 #else 3443 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3444 #endif 3445 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3446 } 3447 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3448 bb[1] = PETSC_MAX_REAL; 3449 if (import) { 3450 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3451 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3452 } 3453 #if defined(PETSC_USE_COMPLEX) 3454 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3455 #else 3456 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3457 #endif 3458 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3459 B_neigs += B_neigs2; 3460 } 3461 break; 3462 case 3: 3463 if (scal) { 3464 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3465 } else { 3466 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3467 } 3468 if (!scal) { 3469 bb[0] = uthresh; 3470 bb[1] = PETSC_MAX_REAL; 3471 #if defined(PETSC_USE_COMPLEX) 3472 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3473 #else 3474 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3475 #endif 3476 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3477 } 3478 if (recipe_m > 0 && B_N - B_neigs > 0) { 3479 PetscBLASInt B_neigs2 = 0; 3480 3481 B_IL = 1; 3482 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3483 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3484 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3485 #if defined(PETSC_USE_COMPLEX) 3486 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3487 #else 3488 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3489 #endif 3490 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3491 B_neigs += B_neigs2; 3492 } 3493 break; 3494 case 4: 3495 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3496 #if defined(PETSC_USE_COMPLEX) 3497 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3498 #else 3499 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3500 #endif 3501 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3502 { 3503 PetscBLASInt B_neigs2 = 0; 3504 3505 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3506 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3507 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3508 #if defined(PETSC_USE_COMPLEX) 3509 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3510 #else 3511 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3512 #endif 3513 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3514 B_neigs += B_neigs2; 3515 } 3516 break; 3517 case 5: /* same as before: first compute all eigenvalues, then filter */ 3518 #if defined(PETSC_USE_COMPLEX) 3519 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3520 #else 3521 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3522 #endif 3523 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3524 { 3525 PetscInt e,k,ne; 3526 for (e=0,ne=0;e<B_neigs;e++) { 3527 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3528 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3529 eigs[ne] = eigs[e]; 3530 ne++; 3531 } 3532 } 3533 ierr = PetscArraycpy(eigv,S,B_N*ne);CHKERRQ(ierr); 3534 B_neigs = ne; 3535 } 3536 break; 3537 default: 3538 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3539 } 3540 } 3541 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3542 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3543 B_IL = 1; 3544 #if defined(PETSC_USE_COMPLEX) 3545 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3546 #else 3547 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3548 #endif 3549 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3550 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3551 PetscInt k; 3552 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3553 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3554 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3555 nmin = nmax; 3556 ierr = PetscArrayzero(eigv,subset_size*nmax);CHKERRQ(ierr); 3557 for (k=0;k<nmax;k++) { 3558 eigs[k] = 1./PETSC_SMALL; 3559 eigv[k*(subset_size+1)] = 1.0; 3560 } 3561 } 3562 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3563 if (B_ierr) { 3564 if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3565 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3566 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3567 } 3568 3569 if (B_neigs > nmax) { 3570 if (pcbddc->dbg_flag) { 3571 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);CHKERRQ(ierr); 3572 } 3573 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3574 B_neigs = nmax; 3575 } 3576 3577 nmin_s = PetscMin(nmin,B_N); 3578 if (B_neigs < nmin_s) { 3579 PetscBLASInt B_neigs2 = 0; 3580 3581 if (pcbddc->use_deluxe_scaling) { 3582 if (scal) { 3583 B_IU = nmin_s; 3584 B_IL = B_neigs + 1; 3585 } else { 3586 B_IL = B_N - nmin_s + 1; 3587 B_IU = B_N - B_neigs; 3588 } 3589 } else { 3590 B_IL = B_neigs + 1; 3591 B_IU = nmin_s; 3592 } 3593 if (pcbddc->dbg_flag) { 3594 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);CHKERRQ(ierr); 3595 } 3596 if (sub_schurs->is_symmetric) { 3597 PetscInt j,k; 3598 for (j=0;j<subset_size;j++) { 3599 for (k=j;k<subset_size;k++) { 3600 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3601 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3602 } 3603 } 3604 } else { 3605 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3606 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3607 } 3608 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3609 #if defined(PETSC_USE_COMPLEX) 3610 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3611 #else 3612 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3613 #endif 3614 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3615 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3616 B_neigs += B_neigs2; 3617 } 3618 if (B_ierr) { 3619 if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3620 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3621 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3622 } 3623 if (pcbddc->dbg_flag) { 3624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3625 for (j=0;j<B_neigs;j++) { 3626 if (eigs[j] == 0.0) { 3627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3628 } else { 3629 if (pcbddc->use_deluxe_scaling) { 3630 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3631 } else { 3632 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3633 } 3634 } 3635 } 3636 } 3637 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3638 } 3639 /* change the basis back to the original one */ 3640 if (sub_schurs->change) { 3641 Mat change,phi,phit; 3642 3643 if (pcbddc->dbg_flag > 2) { 3644 PetscInt ii; 3645 for (ii=0;ii<B_neigs;ii++) { 3646 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3647 for (j=0;j<B_N;j++) { 3648 #if defined(PETSC_USE_COMPLEX) 3649 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3650 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3651 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3652 #else 3653 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3654 #endif 3655 } 3656 } 3657 } 3658 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3659 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3660 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3661 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3662 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3663 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3664 } 3665 maxneigs = PetscMax(B_neigs,maxneigs); 3666 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3667 if (B_neigs) { 3668 ierr = PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);CHKERRQ(ierr); 3669 3670 if (pcbddc->dbg_flag > 1) { 3671 PetscInt ii; 3672 for (ii=0;ii<B_neigs;ii++) { 3673 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3674 for (j=0;j<B_N;j++) { 3675 #if defined(PETSC_USE_COMPLEX) 3676 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3677 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3678 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3679 #else 3680 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3681 #endif 3682 } 3683 } 3684 } 3685 ierr = PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);CHKERRQ(ierr); 3686 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3687 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3688 cum++; 3689 } 3690 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3691 /* shift for next computation */ 3692 cumarray += subset_size*subset_size; 3693 } 3694 if (pcbddc->dbg_flag) { 3695 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3696 } 3697 3698 if (mss) { 3699 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3700 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3701 /* destroy matrices (junk) */ 3702 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3703 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3704 } 3705 if (allocated_S_St) { 3706 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3707 } 3708 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3709 #if defined(PETSC_USE_COMPLEX) 3710 ierr = PetscFree(rwork);CHKERRQ(ierr); 3711 #endif 3712 if (pcbddc->dbg_flag) { 3713 PetscInt maxneigs_r; 3714 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 3715 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);CHKERRQ(ierr); 3716 } 3717 ierr = PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3718 PetscFunctionReturn(0); 3719 } 3720 3721 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3722 { 3723 PetscScalar *coarse_submat_vals; 3724 PetscErrorCode ierr; 3725 3726 PetscFunctionBegin; 3727 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3728 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3729 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3730 3731 /* Setup local neumann solver ksp_R */ 3732 /* PCBDDCSetUpLocalScatters should be called first! */ 3733 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3734 3735 /* 3736 Setup local correction and local part of coarse basis. 3737 Gives back the dense local part of the coarse matrix in column major ordering 3738 */ 3739 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3740 3741 /* Compute total number of coarse nodes and setup coarse solver */ 3742 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3743 3744 /* free */ 3745 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3746 PetscFunctionReturn(0); 3747 } 3748 3749 PetscErrorCode PCBDDCResetCustomization(PC pc) 3750 { 3751 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3752 PetscErrorCode ierr; 3753 3754 PetscFunctionBegin; 3755 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3756 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3757 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3758 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3759 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3760 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3761 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3762 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3763 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3764 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3765 PetscFunctionReturn(0); 3766 } 3767 3768 PetscErrorCode PCBDDCResetTopography(PC pc) 3769 { 3770 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3771 PetscInt i; 3772 PetscErrorCode ierr; 3773 3774 PetscFunctionBegin; 3775 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3776 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3777 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3778 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3779 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3780 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3781 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3782 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3783 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3784 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3785 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3786 for (i=0;i<pcbddc->n_local_subs;i++) { 3787 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3788 } 3789 pcbddc->n_local_subs = 0; 3790 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3791 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3792 pcbddc->graphanalyzed = PETSC_FALSE; 3793 pcbddc->recompute_topography = PETSC_TRUE; 3794 pcbddc->corner_selected = PETSC_FALSE; 3795 PetscFunctionReturn(0); 3796 } 3797 3798 PetscErrorCode PCBDDCResetSolvers(PC pc) 3799 { 3800 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3801 PetscErrorCode ierr; 3802 3803 PetscFunctionBegin; 3804 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3805 if (pcbddc->coarse_phi_B) { 3806 PetscScalar *array; 3807 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3808 ierr = PetscFree(array);CHKERRQ(ierr); 3809 } 3810 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3811 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3812 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3813 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3814 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3815 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3816 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3817 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3818 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3819 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3820 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3821 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3822 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3823 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3824 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3825 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3826 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3827 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3828 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3829 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3830 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3831 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3832 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3833 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3834 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3835 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3836 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3837 if (pcbddc->benign_zerodiag_subs) { 3838 PetscInt i; 3839 for (i=0;i<pcbddc->benign_n;i++) { 3840 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3841 } 3842 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3843 } 3844 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3845 PetscFunctionReturn(0); 3846 } 3847 3848 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3849 { 3850 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3851 PC_IS *pcis = (PC_IS*)pc->data; 3852 VecType impVecType; 3853 PetscInt n_constraints,n_R,old_size; 3854 PetscErrorCode ierr; 3855 3856 PetscFunctionBegin; 3857 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3858 n_R = pcis->n - pcbddc->n_vertices; 3859 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3860 /* local work vectors (try to avoid unneeded work)*/ 3861 /* R nodes */ 3862 old_size = -1; 3863 if (pcbddc->vec1_R) { 3864 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3865 } 3866 if (n_R != old_size) { 3867 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3868 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3869 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3870 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3871 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3872 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3873 } 3874 /* local primal dofs */ 3875 old_size = -1; 3876 if (pcbddc->vec1_P) { 3877 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3878 } 3879 if (pcbddc->local_primal_size != old_size) { 3880 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3881 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3882 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3883 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3884 } 3885 /* local explicit constraints */ 3886 old_size = -1; 3887 if (pcbddc->vec1_C) { 3888 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3889 } 3890 if (n_constraints && n_constraints != old_size) { 3891 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3892 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3893 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3894 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3895 } 3896 PetscFunctionReturn(0); 3897 } 3898 3899 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3900 { 3901 PetscErrorCode ierr; 3902 /* pointers to pcis and pcbddc */ 3903 PC_IS* pcis = (PC_IS*)pc->data; 3904 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3905 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3906 /* submatrices of local problem */ 3907 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3908 /* submatrices of local coarse problem */ 3909 Mat S_VV,S_CV,S_VC,S_CC; 3910 /* working matrices */ 3911 Mat C_CR; 3912 /* additional working stuff */ 3913 PC pc_R; 3914 Mat F,Brhs = NULL; 3915 Vec dummy_vec; 3916 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3917 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3918 PetscScalar *work; 3919 PetscInt *idx_V_B; 3920 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3921 PetscInt i,n_R,n_D,n_B; 3922 PetscScalar one=1.0,m_one=-1.0; 3923 3924 PetscFunctionBegin; 3925 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3926 ierr = PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3927 3928 /* Set Non-overlapping dimensions */ 3929 n_vertices = pcbddc->n_vertices; 3930 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3931 n_B = pcis->n_B; 3932 n_D = pcis->n - n_B; 3933 n_R = pcis->n - n_vertices; 3934 3935 /* vertices in boundary numbering */ 3936 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3937 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3938 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3939 3940 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3941 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3942 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3943 ierr = MatDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3944 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3945 ierr = MatDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3946 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3947 ierr = MatDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3949 ierr = MatDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3950 3951 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3952 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3953 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3954 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3955 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3956 lda_rhs = n_R; 3957 need_benign_correction = PETSC_FALSE; 3958 if (isLU || isCHOL) { 3959 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3960 } else if (sub_schurs && sub_schurs->reuse_solver) { 3961 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3962 MatFactorType type; 3963 3964 F = reuse_solver->F; 3965 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3966 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3967 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3968 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3969 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3970 } else F = NULL; 3971 3972 /* determine if we can use a sparse right-hand side */ 3973 sparserhs = PETSC_FALSE; 3974 if (F) { 3975 MatSolverType solver; 3976 3977 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3978 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3979 } 3980 3981 /* allocate workspace */ 3982 n = 0; 3983 if (n_constraints) { 3984 n += lda_rhs*n_constraints; 3985 } 3986 if (n_vertices) { 3987 n = PetscMax(2*lda_rhs*n_vertices,n); 3988 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3989 } 3990 if (!pcbddc->symmetric_primal) { 3991 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3992 } 3993 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3994 3995 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3996 dummy_vec = NULL; 3997 if (need_benign_correction && lda_rhs != n_R && F) { 3998 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);CHKERRQ(ierr); 3999 ierr = VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);CHKERRQ(ierr); 4000 ierr = VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);CHKERRQ(ierr); 4001 } 4002 4003 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 4004 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 4005 4006 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4007 if (n_constraints) { 4008 Mat M3,C_B; 4009 IS is_aux; 4010 PetscScalar *array,*array2; 4011 4012 /* Extract constraints on R nodes: C_{CR} */ 4013 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 4014 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 4015 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4016 4017 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4018 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4019 if (!sparserhs) { 4020 ierr = PetscArrayzero(work,lda_rhs*n_constraints);CHKERRQ(ierr); 4021 for (i=0;i<n_constraints;i++) { 4022 const PetscScalar *row_cmat_values; 4023 const PetscInt *row_cmat_indices; 4024 PetscInt size_of_constraint,j; 4025 4026 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4027 for (j=0;j<size_of_constraint;j++) { 4028 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4029 } 4030 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4031 } 4032 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 4033 } else { 4034 Mat tC_CR; 4035 4036 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4037 if (lda_rhs != n_R) { 4038 PetscScalar *aa; 4039 PetscInt r,*ii,*jj; 4040 PetscBool done; 4041 4042 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4043 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4044 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 4045 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 4046 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4047 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4048 } else { 4049 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 4050 tC_CR = C_CR; 4051 } 4052 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 4053 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 4054 } 4055 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 4056 if (F) { 4057 if (need_benign_correction) { 4058 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4059 4060 /* rhs is already zero on interior dofs, no need to change the rhs */ 4061 ierr = PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);CHKERRQ(ierr); 4062 } 4063 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 4064 if (need_benign_correction) { 4065 PetscScalar *marr; 4066 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4067 4068 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4069 if (lda_rhs != n_R) { 4070 for (i=0;i<n_constraints;i++) { 4071 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4072 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4073 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4074 } 4075 } else { 4076 for (i=0;i<n_constraints;i++) { 4077 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4078 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4079 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4080 } 4081 } 4082 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4083 } 4084 } else { 4085 PetscScalar *marr; 4086 4087 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4088 for (i=0;i<n_constraints;i++) { 4089 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4090 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 4091 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4092 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4093 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4094 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4095 } 4096 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4097 } 4098 if (sparserhs) { 4099 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4100 } 4101 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4102 if (!pcbddc->switch_static) { 4103 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4104 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4105 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4106 for (i=0;i<n_constraints;i++) { 4107 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 4108 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 4109 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4110 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4111 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4112 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4113 } 4114 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4115 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4116 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4117 } else { 4118 if (lda_rhs != n_R) { 4119 IS dummy; 4120 4121 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4122 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4123 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4124 } else { 4125 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4126 pcbddc->local_auxmat2 = local_auxmat2_R; 4127 } 4128 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4129 } 4130 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4131 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4132 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4133 if (isCHOL) { 4134 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4135 } else { 4136 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4137 } 4138 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4139 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4140 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4141 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4142 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4143 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4144 } 4145 4146 /* Get submatrices from subdomain matrix */ 4147 if (n_vertices) { 4148 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4149 PetscBool oldpin; 4150 #endif 4151 PetscBool isaij; 4152 IS is_aux; 4153 4154 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4155 IS tis; 4156 4157 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4158 ierr = ISSort(tis);CHKERRQ(ierr); 4159 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4160 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4161 } else { 4162 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4163 } 4164 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4165 oldpin = pcbddc->local_mat->boundtocpu; 4166 #endif 4167 ierr = MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);CHKERRQ(ierr); 4168 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4169 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4170 ierr = PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);CHKERRQ(ierr); 4171 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4172 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4173 } 4174 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4175 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4176 ierr = MatBindToCPU(pcbddc->local_mat,oldpin);CHKERRQ(ierr); 4177 #endif 4178 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4179 } 4180 4181 /* Matrix of coarse basis functions (local) */ 4182 if (pcbddc->coarse_phi_B) { 4183 PetscInt on_B,on_primal,on_D=n_D; 4184 if (pcbddc->coarse_phi_D) { 4185 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4186 } 4187 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4188 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4189 PetscScalar *marray; 4190 4191 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4192 ierr = PetscFree(marray);CHKERRQ(ierr); 4193 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4194 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4195 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4196 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4197 } 4198 } 4199 4200 if (!pcbddc->coarse_phi_B) { 4201 PetscScalar *marr; 4202 4203 /* memory size */ 4204 n = n_B*pcbddc->local_primal_size; 4205 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4206 if (!pcbddc->symmetric_primal) n *= 2; 4207 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4208 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4209 marr += n_B*pcbddc->local_primal_size; 4210 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4211 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4212 marr += n_D*pcbddc->local_primal_size; 4213 } 4214 if (!pcbddc->symmetric_primal) { 4215 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4216 marr += n_B*pcbddc->local_primal_size; 4217 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4218 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4219 } 4220 } else { 4221 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4222 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4223 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4224 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4225 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4226 } 4227 } 4228 } 4229 4230 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4231 p0_lidx_I = NULL; 4232 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4233 const PetscInt *idxs; 4234 4235 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4236 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4237 for (i=0;i<pcbddc->benign_n;i++) { 4238 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4239 } 4240 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4241 } 4242 4243 /* vertices */ 4244 if (n_vertices) { 4245 PetscBool restoreavr = PETSC_FALSE; 4246 4247 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4248 4249 if (n_R) { 4250 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4251 PetscBLASInt B_N,B_one = 1; 4252 const PetscScalar *x; 4253 PetscScalar *y; 4254 4255 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4256 if (need_benign_correction) { 4257 ISLocalToGlobalMapping RtoN; 4258 IS is_p0; 4259 PetscInt *idxs_p0,n; 4260 4261 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4262 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4263 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4264 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4265 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4266 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4267 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4268 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4269 } 4270 4271 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4272 if (!sparserhs || need_benign_correction) { 4273 if (lda_rhs == n_R) { 4274 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4275 } else { 4276 PetscScalar *av,*array; 4277 const PetscInt *xadj,*adjncy; 4278 PetscInt n; 4279 PetscBool flg_row; 4280 4281 array = work+lda_rhs*n_vertices; 4282 ierr = PetscArrayzero(array,lda_rhs*n_vertices);CHKERRQ(ierr); 4283 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4284 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4285 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4286 for (i=0;i<n;i++) { 4287 PetscInt j; 4288 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4289 } 4290 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4291 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4292 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4293 } 4294 if (need_benign_correction) { 4295 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4296 PetscScalar *marr; 4297 4298 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4299 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4300 4301 | 0 0 0 | (V) 4302 L = | 0 0 -1 | (P-p0) 4303 | 0 0 -1 | (p0) 4304 4305 */ 4306 for (i=0;i<reuse_solver->benign_n;i++) { 4307 const PetscScalar *vals; 4308 const PetscInt *idxs,*idxs_zero; 4309 PetscInt n,j,nz; 4310 4311 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4312 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4313 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4314 for (j=0;j<n;j++) { 4315 PetscScalar val = vals[j]; 4316 PetscInt k,col = idxs[j]; 4317 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4318 } 4319 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4320 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4321 } 4322 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4323 } 4324 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4325 Brhs = A_RV; 4326 } else { 4327 Mat tA_RVT,A_RVT; 4328 4329 if (!pcbddc->symmetric_primal) { 4330 /* A_RV already scaled by -1 */ 4331 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4332 } else { 4333 restoreavr = PETSC_TRUE; 4334 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4335 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4336 A_RVT = A_VR; 4337 } 4338 if (lda_rhs != n_R) { 4339 PetscScalar *aa; 4340 PetscInt r,*ii,*jj; 4341 PetscBool done; 4342 4343 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4344 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4345 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4346 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4347 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4348 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4349 } else { 4350 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4351 tA_RVT = A_RVT; 4352 } 4353 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4354 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4355 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4356 } 4357 if (F) { 4358 /* need to correct the rhs */ 4359 if (need_benign_correction) { 4360 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4361 PetscScalar *marr; 4362 4363 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4364 if (lda_rhs != n_R) { 4365 for (i=0;i<n_vertices;i++) { 4366 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4367 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4368 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4369 } 4370 } else { 4371 for (i=0;i<n_vertices;i++) { 4372 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4373 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4374 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4375 } 4376 } 4377 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4378 } 4379 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4380 if (restoreavr) { 4381 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4382 } 4383 /* need to correct the solution */ 4384 if (need_benign_correction) { 4385 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4386 PetscScalar *marr; 4387 4388 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4389 if (lda_rhs != n_R) { 4390 for (i=0;i<n_vertices;i++) { 4391 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4392 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4393 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4394 } 4395 } else { 4396 for (i=0;i<n_vertices;i++) { 4397 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4398 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4399 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4400 } 4401 } 4402 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4403 } 4404 } else { 4405 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4406 for (i=0;i<n_vertices;i++) { 4407 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4408 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4409 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4410 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4411 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4412 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4413 } 4414 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4415 } 4416 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4417 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4418 /* S_VV and S_CV */ 4419 if (n_constraints) { 4420 Mat B; 4421 4422 ierr = PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);CHKERRQ(ierr); 4423 for (i=0;i<n_vertices;i++) { 4424 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4425 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4426 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4427 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4428 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4429 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4430 } 4431 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4432 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4433 ierr = MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);CHKERRQ(ierr); 4434 ierr = MatProductSetType(S_CV,MATPRODUCT_AB);CHKERRQ(ierr); 4435 ierr = MatProductSetFromOptions(S_CV);CHKERRQ(ierr); 4436 ierr = MatProductSymbolic(S_CV);CHKERRQ(ierr); 4437 ierr = MatProductNumeric(S_CV);CHKERRQ(ierr); 4438 ierr = MatProductClear(S_CV);CHKERRQ(ierr); 4439 4440 ierr = MatDestroy(&B);CHKERRQ(ierr); 4441 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4442 /* Reuse B = local_auxmat2_R * S_CV */ 4443 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);CHKERRQ(ierr); 4444 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4445 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4446 ierr = MatProductSymbolic(B);CHKERRQ(ierr); 4447 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4448 4449 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4450 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4451 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4452 ierr = MatDestroy(&B);CHKERRQ(ierr); 4453 } 4454 if (lda_rhs != n_R) { 4455 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4456 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4457 ierr = MatDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4458 } 4459 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4460 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4461 if (need_benign_correction) { 4462 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4463 PetscScalar *marr,*sums; 4464 4465 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4466 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4467 for (i=0;i<reuse_solver->benign_n;i++) { 4468 const PetscScalar *vals; 4469 const PetscInt *idxs,*idxs_zero; 4470 PetscInt n,j,nz; 4471 4472 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4473 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4474 for (j=0;j<n_vertices;j++) { 4475 PetscInt k; 4476 sums[j] = 0.; 4477 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4478 } 4479 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4480 for (j=0;j<n;j++) { 4481 PetscScalar val = vals[j]; 4482 PetscInt k; 4483 for (k=0;k<n_vertices;k++) { 4484 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4485 } 4486 } 4487 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4488 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4489 } 4490 ierr = PetscFree(sums);CHKERRQ(ierr); 4491 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4492 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4493 } 4494 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4495 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4496 ierr = MatDenseGetArrayRead(A_VV,&x);CHKERRQ(ierr); 4497 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4498 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4499 ierr = MatDenseRestoreArrayRead(A_VV,&x);CHKERRQ(ierr); 4500 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4501 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4502 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4503 } else { 4504 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4505 } 4506 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4507 4508 /* coarse basis functions */ 4509 for (i=0;i<n_vertices;i++) { 4510 PetscScalar *y; 4511 4512 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4513 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4514 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4515 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4516 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4517 y[n_B*i+idx_V_B[i]] = 1.0; 4518 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4519 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4520 4521 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4522 PetscInt j; 4523 4524 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4525 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4526 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4527 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4528 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4529 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4530 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4531 } 4532 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4533 } 4534 /* if n_R == 0 the object is not destroyed */ 4535 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4536 } 4537 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4538 4539 if (n_constraints) { 4540 Mat B; 4541 4542 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4543 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4544 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);CHKERRQ(ierr); 4545 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4546 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4547 ierr = MatProductSymbolic(B);CHKERRQ(ierr); 4548 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4549 4550 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4551 if (n_vertices) { 4552 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4553 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4554 } else { 4555 Mat S_VCt; 4556 4557 if (lda_rhs != n_R) { 4558 ierr = MatDestroy(&B);CHKERRQ(ierr); 4559 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4560 ierr = MatDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4561 } 4562 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4563 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4564 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4565 } 4566 } 4567 ierr = MatDestroy(&B);CHKERRQ(ierr); 4568 /* coarse basis functions */ 4569 for (i=0;i<n_constraints;i++) { 4570 PetscScalar *y; 4571 4572 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4573 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4574 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4575 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4576 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4577 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4578 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4579 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4580 PetscInt j; 4581 4582 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4583 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4584 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4585 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4586 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4587 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4588 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4589 } 4590 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4591 } 4592 } 4593 if (n_constraints) { 4594 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4595 } 4596 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4597 4598 /* coarse matrix entries relative to B_0 */ 4599 if (pcbddc->benign_n) { 4600 Mat B0_B,B0_BPHI; 4601 IS is_dummy; 4602 const PetscScalar *data; 4603 PetscInt j; 4604 4605 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4606 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4607 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4608 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4609 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4610 ierr = MatDenseGetArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4611 for (j=0;j<pcbddc->benign_n;j++) { 4612 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4613 for (i=0;i<pcbddc->local_primal_size;i++) { 4614 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4615 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4616 } 4617 } 4618 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4619 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4620 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4621 } 4622 4623 /* compute other basis functions for non-symmetric problems */ 4624 if (!pcbddc->symmetric_primal) { 4625 Mat B_V=NULL,B_C=NULL; 4626 PetscScalar *marray; 4627 4628 if (n_constraints) { 4629 Mat S_CCT,C_CRT; 4630 4631 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4632 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4633 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4634 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4635 if (n_vertices) { 4636 Mat S_VCT; 4637 4638 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4639 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4640 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4641 } 4642 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4643 } else { 4644 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4645 } 4646 if (n_vertices && n_R) { 4647 PetscScalar *av,*marray; 4648 const PetscInt *xadj,*adjncy; 4649 PetscInt n; 4650 PetscBool flg_row; 4651 4652 /* B_V = B_V - A_VR^T */ 4653 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4654 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4655 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4656 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4657 for (i=0;i<n;i++) { 4658 PetscInt j; 4659 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4660 } 4661 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4662 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4663 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4664 } 4665 4666 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4667 if (n_vertices) { 4668 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4669 for (i=0;i<n_vertices;i++) { 4670 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4671 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4672 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4673 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4674 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4675 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4676 } 4677 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4678 } 4679 if (B_C) { 4680 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4681 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4682 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4683 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4684 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4685 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4686 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4687 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4688 } 4689 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4690 } 4691 /* coarse basis functions */ 4692 for (i=0;i<pcbddc->local_primal_size;i++) { 4693 PetscScalar *y; 4694 4695 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4696 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4697 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4698 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4699 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4700 if (i<n_vertices) { 4701 y[n_B*i+idx_V_B[i]] = 1.0; 4702 } 4703 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4704 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4705 4706 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4707 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4708 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4709 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4710 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4711 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4712 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4713 } 4714 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4715 } 4716 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4717 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4718 } 4719 4720 /* free memory */ 4721 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4722 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4723 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4724 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4725 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4726 ierr = PetscFree(work);CHKERRQ(ierr); 4727 if (n_vertices) { 4728 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4729 } 4730 if (n_constraints) { 4731 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4732 } 4733 ierr = PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 4734 4735 /* Checking coarse_sub_mat and coarse basis functios */ 4736 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4737 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4738 if (pcbddc->dbg_flag) { 4739 Mat coarse_sub_mat; 4740 Mat AUXMAT,TM1,TM2,TM3,TM4; 4741 Mat coarse_phi_D,coarse_phi_B; 4742 Mat coarse_psi_D,coarse_psi_B; 4743 Mat A_II,A_BB,A_IB,A_BI; 4744 Mat C_B,CPHI; 4745 IS is_dummy; 4746 Vec mones; 4747 MatType checkmattype=MATSEQAIJ; 4748 PetscReal real_value; 4749 4750 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4751 Mat A; 4752 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4753 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4754 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4755 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4756 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4757 ierr = MatDestroy(&A);CHKERRQ(ierr); 4758 } else { 4759 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4760 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4761 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4762 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4763 } 4764 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4765 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4766 if (!pcbddc->symmetric_primal) { 4767 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4768 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4769 } 4770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4771 4772 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4773 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4774 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4775 if (!pcbddc->symmetric_primal) { 4776 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4777 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4778 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4779 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4780 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4781 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4782 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4783 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4784 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4785 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4786 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4787 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4788 } else { 4789 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4790 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4791 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4792 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4793 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4794 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4795 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4796 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4797 } 4798 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4799 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4800 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4801 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4802 if (pcbddc->benign_n) { 4803 Mat B0_B,B0_BPHI; 4804 const PetscScalar *data2; 4805 PetscScalar *data; 4806 PetscInt j; 4807 4808 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4809 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4810 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4811 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4812 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4813 ierr = MatDenseGetArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4814 for (j=0;j<pcbddc->benign_n;j++) { 4815 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4816 for (i=0;i<pcbddc->local_primal_size;i++) { 4817 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4818 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4819 } 4820 } 4821 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4822 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4823 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4824 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4825 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4826 } 4827 #if 0 4828 { 4829 PetscViewer viewer; 4830 char filename[256]; 4831 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4832 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4833 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4834 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4835 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4836 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4837 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4838 if (pcbddc->coarse_phi_B) { 4839 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4840 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4841 } 4842 if (pcbddc->coarse_phi_D) { 4843 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4844 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4845 } 4846 if (pcbddc->coarse_psi_B) { 4847 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4848 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4849 } 4850 if (pcbddc->coarse_psi_D) { 4851 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4852 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4853 } 4854 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4855 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4856 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4857 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4858 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4859 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4860 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4861 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4862 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4863 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4864 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4865 } 4866 #endif 4867 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4868 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4869 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4870 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4871 4872 /* check constraints */ 4873 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4874 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4875 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4876 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4877 } else { 4878 PetscScalar *data; 4879 Mat tmat; 4880 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4881 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4882 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4883 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4884 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4885 } 4886 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4887 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4888 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4889 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4890 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4891 if (!pcbddc->symmetric_primal) { 4892 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4893 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4894 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4895 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4896 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4897 } 4898 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4899 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4900 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4901 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4902 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4903 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4904 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4905 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4906 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4907 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4908 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4909 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4910 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4911 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4912 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4913 if (!pcbddc->symmetric_primal) { 4914 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4915 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4916 } 4917 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4918 } 4919 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4920 { 4921 PetscBool gpu; 4922 4923 ierr = PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);CHKERRQ(ierr); 4924 if (gpu) { 4925 if (pcbddc->local_auxmat1) { 4926 ierr = MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4927 } 4928 if (pcbddc->local_auxmat2) { 4929 ierr = MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4930 } 4931 if (pcbddc->coarse_phi_B) { 4932 ierr = MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4933 } 4934 if (pcbddc->coarse_phi_D) { 4935 ierr = MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4936 } 4937 if (pcbddc->coarse_psi_B) { 4938 ierr = MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4939 } 4940 if (pcbddc->coarse_psi_D) { 4941 ierr = MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4942 } 4943 } 4944 } 4945 /* get back data */ 4946 *coarse_submat_vals_n = coarse_submat_vals; 4947 PetscFunctionReturn(0); 4948 } 4949 4950 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4951 { 4952 Mat *work_mat; 4953 IS isrow_s,iscol_s; 4954 PetscBool rsorted,csorted; 4955 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4956 PetscErrorCode ierr; 4957 4958 PetscFunctionBegin; 4959 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4960 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4961 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4962 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4963 4964 if (!rsorted) { 4965 const PetscInt *idxs; 4966 PetscInt *idxs_sorted,i; 4967 4968 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4969 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4970 for (i=0;i<rsize;i++) { 4971 idxs_perm_r[i] = i; 4972 } 4973 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4974 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4975 for (i=0;i<rsize;i++) { 4976 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4977 } 4978 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4979 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4980 } else { 4981 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4982 isrow_s = isrow; 4983 } 4984 4985 if (!csorted) { 4986 if (isrow == iscol) { 4987 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4988 iscol_s = isrow_s; 4989 } else { 4990 const PetscInt *idxs; 4991 PetscInt *idxs_sorted,i; 4992 4993 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4994 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4995 for (i=0;i<csize;i++) { 4996 idxs_perm_c[i] = i; 4997 } 4998 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4999 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 5000 for (i=0;i<csize;i++) { 5001 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5002 } 5003 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 5004 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 5005 } 5006 } else { 5007 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 5008 iscol_s = iscol; 5009 } 5010 5011 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5012 5013 if (!rsorted || !csorted) { 5014 Mat new_mat; 5015 IS is_perm_r,is_perm_c; 5016 5017 if (!rsorted) { 5018 PetscInt *idxs_r,i; 5019 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 5020 for (i=0;i<rsize;i++) { 5021 idxs_r[idxs_perm_r[i]] = i; 5022 } 5023 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 5024 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 5025 } else { 5026 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 5027 } 5028 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 5029 5030 if (!csorted) { 5031 if (isrow_s == iscol_s) { 5032 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 5033 is_perm_c = is_perm_r; 5034 } else { 5035 PetscInt *idxs_c,i; 5036 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5037 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 5038 for (i=0;i<csize;i++) { 5039 idxs_c[idxs_perm_c[i]] = i; 5040 } 5041 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 5042 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 5043 } 5044 } else { 5045 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 5046 } 5047 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 5048 5049 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 5050 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 5051 work_mat[0] = new_mat; 5052 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 5053 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 5054 } 5055 5056 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 5057 *B = work_mat[0]; 5058 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 5059 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 5060 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 5061 PetscFunctionReturn(0); 5062 } 5063 5064 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5065 { 5066 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5067 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5068 Mat new_mat,lA; 5069 IS is_local,is_global; 5070 PetscInt local_size; 5071 PetscBool isseqaij; 5072 PetscErrorCode ierr; 5073 5074 PetscFunctionBegin; 5075 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5076 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 5077 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 5078 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 5079 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 5080 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 5081 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5082 5083 if (pcbddc->dbg_flag) { 5084 Vec x,x_change; 5085 PetscReal error; 5086 5087 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 5088 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5089 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 5090 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5091 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5092 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 5093 if (!pcbddc->change_interior) { 5094 const PetscScalar *x,*y,*v; 5095 PetscReal lerror = 0.; 5096 PetscInt i; 5097 5098 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 5099 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 5100 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 5101 for (i=0;i<local_size;i++) 5102 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5103 lerror = PetscAbsScalar(x[i]-y[i]); 5104 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 5105 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 5106 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 5107 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 5108 if (error > PETSC_SMALL) { 5109 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5110 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 5111 } else { 5112 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 5113 } 5114 } 5115 } 5116 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5117 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5118 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5119 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5120 if (error > PETSC_SMALL) { 5121 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5122 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5123 } else { 5124 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5125 } 5126 } 5127 ierr = VecDestroy(&x);CHKERRQ(ierr); 5128 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5129 } 5130 5131 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5132 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 5133 5134 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5135 ierr = PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5136 if (isseqaij) { 5137 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5138 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5139 if (lA) { 5140 Mat work; 5141 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5142 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5143 ierr = MatDestroy(&work);CHKERRQ(ierr); 5144 } 5145 } else { 5146 Mat work_mat; 5147 5148 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5149 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5150 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5151 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 5152 if (lA) { 5153 Mat work; 5154 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5155 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5156 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5157 ierr = MatDestroy(&work);CHKERRQ(ierr); 5158 } 5159 } 5160 if (matis->A->symmetric_set) { 5161 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 5162 #if !defined(PETSC_USE_COMPLEX) 5163 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 5164 #endif 5165 } 5166 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 5167 PetscFunctionReturn(0); 5168 } 5169 5170 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5171 { 5172 PC_IS* pcis = (PC_IS*)(pc->data); 5173 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5174 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5175 PetscInt *idx_R_local=NULL; 5176 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5177 PetscInt vbs,bs; 5178 PetscBT bitmask=NULL; 5179 PetscErrorCode ierr; 5180 5181 PetscFunctionBegin; 5182 /* 5183 No need to setup local scatters if 5184 - primal space is unchanged 5185 AND 5186 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5187 AND 5188 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5189 */ 5190 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5191 PetscFunctionReturn(0); 5192 } 5193 /* destroy old objects */ 5194 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5195 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5196 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5197 /* Set Non-overlapping dimensions */ 5198 n_B = pcis->n_B; 5199 n_D = pcis->n - n_B; 5200 n_vertices = pcbddc->n_vertices; 5201 5202 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5203 5204 /* create auxiliary bitmask and allocate workspace */ 5205 if (!sub_schurs || !sub_schurs->reuse_solver) { 5206 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5207 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5208 for (i=0;i<n_vertices;i++) { 5209 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5210 } 5211 5212 for (i=0, n_R=0; i<pcis->n; i++) { 5213 if (!PetscBTLookup(bitmask,i)) { 5214 idx_R_local[n_R++] = i; 5215 } 5216 } 5217 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5218 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5219 5220 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5221 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5222 } 5223 5224 /* Block code */ 5225 vbs = 1; 5226 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5227 if (bs>1 && !(n_vertices%bs)) { 5228 PetscBool is_blocked = PETSC_TRUE; 5229 PetscInt *vary; 5230 if (!sub_schurs || !sub_schurs->reuse_solver) { 5231 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5232 ierr = PetscArrayzero(vary,pcis->n/bs);CHKERRQ(ierr); 5233 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5234 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5235 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5236 for (i=0; i<pcis->n/bs; i++) { 5237 if (vary[i]!=0 && vary[i]!=bs) { 5238 is_blocked = PETSC_FALSE; 5239 break; 5240 } 5241 } 5242 ierr = PetscFree(vary);CHKERRQ(ierr); 5243 } else { 5244 /* Verify directly the R set */ 5245 for (i=0; i<n_R/bs; i++) { 5246 PetscInt j,node=idx_R_local[bs*i]; 5247 for (j=1; j<bs; j++) { 5248 if (node != idx_R_local[bs*i+j]-j) { 5249 is_blocked = PETSC_FALSE; 5250 break; 5251 } 5252 } 5253 } 5254 } 5255 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5256 vbs = bs; 5257 for (i=0;i<n_R/vbs;i++) { 5258 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5259 } 5260 } 5261 } 5262 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5263 if (sub_schurs && sub_schurs->reuse_solver) { 5264 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5265 5266 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5267 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5268 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5269 reuse_solver->is_R = pcbddc->is_R_local; 5270 } else { 5271 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5272 } 5273 5274 /* print some info if requested */ 5275 if (pcbddc->dbg_flag) { 5276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5277 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5278 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5279 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5282 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5283 } 5284 5285 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5286 if (!sub_schurs || !sub_schurs->reuse_solver) { 5287 IS is_aux1,is_aux2; 5288 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5289 5290 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5291 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5292 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5293 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5294 for (i=0; i<n_D; i++) { 5295 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5296 } 5297 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5298 for (i=0, j=0; i<n_R; i++) { 5299 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5300 aux_array1[j++] = i; 5301 } 5302 } 5303 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5304 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5305 for (i=0, j=0; i<n_B; i++) { 5306 if (!PetscBTLookup(bitmask,is_indices[i])) { 5307 aux_array2[j++] = i; 5308 } 5309 } 5310 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5311 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5312 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5313 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5314 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5315 5316 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5317 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5318 for (i=0, j=0; i<n_R; i++) { 5319 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5320 aux_array1[j++] = i; 5321 } 5322 } 5323 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5324 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5325 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5326 } 5327 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5328 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5329 } else { 5330 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5331 IS tis; 5332 PetscInt schur_size; 5333 5334 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5335 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5336 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5337 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5338 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5339 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5340 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5341 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5342 } 5343 } 5344 PetscFunctionReturn(0); 5345 } 5346 5347 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5348 { 5349 MatNullSpace NullSpace; 5350 Mat dmat; 5351 const Vec *nullvecs; 5352 Vec v,v2,*nullvecs2; 5353 VecScatter sct = NULL; 5354 PetscContainer c; 5355 PetscScalar *ddata; 5356 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5357 PetscBool nnsp_has_cnst; 5358 PetscErrorCode ierr; 5359 5360 PetscFunctionBegin; 5361 if (!is && !B) { /* MATIS */ 5362 Mat_IS* matis = (Mat_IS*)A->data; 5363 5364 if (!B) { 5365 ierr = MatISGetLocalMat(A,&B);CHKERRQ(ierr); 5366 } 5367 sct = matis->cctx; 5368 ierr = PetscObjectReference((PetscObject)sct);CHKERRQ(ierr); 5369 } else { 5370 ierr = MatGetNullSpace(B,&NullSpace);CHKERRQ(ierr); 5371 if (!NullSpace) { 5372 ierr = MatGetNearNullSpace(B,&NullSpace);CHKERRQ(ierr); 5373 } 5374 if (NullSpace) PetscFunctionReturn(0); 5375 } 5376 ierr = MatGetNullSpace(A,&NullSpace);CHKERRQ(ierr); 5377 if (!NullSpace) { 5378 ierr = MatGetNearNullSpace(A,&NullSpace);CHKERRQ(ierr); 5379 } 5380 if (!NullSpace) PetscFunctionReturn(0); 5381 5382 ierr = MatCreateVecs(A,&v,NULL);CHKERRQ(ierr); 5383 ierr = MatCreateVecs(B,&v2,NULL);CHKERRQ(ierr); 5384 if (!sct) { 5385 ierr = VecScatterCreate(v,is,v2,NULL,&sct);CHKERRQ(ierr); 5386 } 5387 ierr = MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);CHKERRQ(ierr); 5388 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5389 ierr = PetscMalloc1(bsiz,&nullvecs2);CHKERRQ(ierr); 5390 ierr = VecGetBlockSize(v2,&bs);CHKERRQ(ierr); 5391 ierr = VecGetSize(v2,&N);CHKERRQ(ierr); 5392 ierr = VecGetLocalSize(v2,&n);CHKERRQ(ierr); 5393 ierr = PetscMalloc1(n*bsiz,&ddata);CHKERRQ(ierr); 5394 for (k=0;k<nnsp_size;k++) { 5395 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);CHKERRQ(ierr); 5396 ierr = VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5397 ierr = VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5398 } 5399 if (nnsp_has_cnst) { 5400 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);CHKERRQ(ierr); 5401 ierr = VecSet(nullvecs2[nnsp_size],1.0);CHKERRQ(ierr); 5402 } 5403 ierr = PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);CHKERRQ(ierr); 5404 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);CHKERRQ(ierr); 5405 5406 ierr = MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);CHKERRQ(ierr); 5407 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);CHKERRQ(ierr); 5408 ierr = PetscContainerSetPointer(c,ddata);CHKERRQ(ierr); 5409 ierr = PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);CHKERRQ(ierr); 5410 ierr = PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);CHKERRQ(ierr); 5411 ierr = PetscContainerDestroy(&c);CHKERRQ(ierr); 5412 ierr = PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);CHKERRQ(ierr); 5413 ierr = MatDestroy(&dmat);CHKERRQ(ierr); 5414 5415 for (k=0;k<bsiz;k++) { 5416 ierr = VecDestroy(&nullvecs2[k]);CHKERRQ(ierr); 5417 } 5418 ierr = PetscFree(nullvecs2);CHKERRQ(ierr); 5419 ierr = MatSetNearNullSpace(B,NullSpace);CHKERRQ(ierr); 5420 ierr = MatNullSpaceDestroy(&NullSpace);CHKERRQ(ierr); 5421 ierr = VecDestroy(&v);CHKERRQ(ierr); 5422 ierr = VecDestroy(&v2);CHKERRQ(ierr); 5423 ierr = VecScatterDestroy(&sct);CHKERRQ(ierr); 5424 PetscFunctionReturn(0); 5425 } 5426 5427 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5428 { 5429 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5430 PC_IS *pcis = (PC_IS*)pc->data; 5431 PC pc_temp; 5432 Mat A_RR; 5433 MatNullSpace nnsp; 5434 MatReuse reuse; 5435 PetscScalar m_one = -1.0; 5436 PetscReal value; 5437 PetscInt n_D,n_R; 5438 PetscBool issbaij,opts; 5439 PetscErrorCode ierr; 5440 void (*f)(void) = NULL; 5441 char dir_prefix[256],neu_prefix[256],str_level[16]; 5442 size_t len; 5443 5444 PetscFunctionBegin; 5445 ierr = PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5446 /* approximate solver, propagate NearNullSpace if needed */ 5447 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5448 MatNullSpace gnnsp1,gnnsp2; 5449 PetscBool lhas,ghas; 5450 5451 ierr = MatGetNearNullSpace(pcbddc->local_mat,&nnsp);CHKERRQ(ierr); 5452 ierr = MatGetNearNullSpace(pc->pmat,&gnnsp1);CHKERRQ(ierr); 5453 ierr = MatGetNullSpace(pc->pmat,&gnnsp2);CHKERRQ(ierr); 5454 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5455 ierr = MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 5456 if (!ghas && (gnnsp1 || gnnsp2)) { 5457 ierr = MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);CHKERRQ(ierr); 5458 } 5459 } 5460 5461 /* compute prefixes */ 5462 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5463 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5464 if (!pcbddc->current_level) { 5465 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5466 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5467 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5468 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5469 } else { 5470 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5471 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5472 len -= 15; /* remove "pc_bddc_coarse_" */ 5473 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5474 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5475 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5476 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5477 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5478 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5479 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5480 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5481 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5482 } 5483 5484 /* DIRICHLET PROBLEM */ 5485 if (dirichlet) { 5486 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5487 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5488 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5489 if (pcbddc->dbg_flag) { 5490 Mat A_IIn; 5491 5492 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5493 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5494 pcis->A_II = A_IIn; 5495 } 5496 } 5497 if (pcbddc->local_mat->symmetric_set) { 5498 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5499 } 5500 /* Matrix for Dirichlet problem is pcis->A_II */ 5501 n_D = pcis->n - pcis->n_B; 5502 opts = PETSC_FALSE; 5503 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5504 opts = PETSC_TRUE; 5505 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5506 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5507 /* default */ 5508 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5509 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5510 ierr = PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5511 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5512 if (issbaij) { 5513 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5514 } else { 5515 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5516 } 5517 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 5518 } 5519 ierr = MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);CHKERRQ(ierr); 5520 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);CHKERRQ(ierr); 5521 /* Allow user's customization */ 5522 if (opts) { 5523 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5524 } 5525 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5526 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5527 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);CHKERRQ(ierr); 5528 } 5529 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5530 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5531 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5532 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5533 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5534 const PetscInt *idxs; 5535 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5536 5537 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5538 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5539 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5540 for (i=0;i<nl;i++) { 5541 for (d=0;d<cdim;d++) { 5542 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5543 } 5544 } 5545 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5546 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5547 ierr = PetscFree(scoords);CHKERRQ(ierr); 5548 } 5549 if (sub_schurs && sub_schurs->reuse_solver) { 5550 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5551 5552 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5553 } 5554 5555 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5556 if (!n_D) { 5557 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5558 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5559 } 5560 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5561 /* set ksp_D into pcis data */ 5562 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5563 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5564 pcis->ksp_D = pcbddc->ksp_D; 5565 } 5566 5567 /* NEUMANN PROBLEM */ 5568 A_RR = NULL; 5569 if (neumann) { 5570 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5571 PetscInt ibs,mbs; 5572 PetscBool issbaij, reuse_neumann_solver; 5573 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5574 5575 reuse_neumann_solver = PETSC_FALSE; 5576 if (sub_schurs && sub_schurs->reuse_solver) { 5577 IS iP; 5578 5579 reuse_neumann_solver = PETSC_TRUE; 5580 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5581 if (iP) reuse_neumann_solver = PETSC_FALSE; 5582 } 5583 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5584 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5585 if (pcbddc->ksp_R) { /* already created ksp */ 5586 PetscInt nn_R; 5587 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5588 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5589 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5590 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5591 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5592 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5593 reuse = MAT_INITIAL_MATRIX; 5594 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5595 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5596 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5597 reuse = MAT_INITIAL_MATRIX; 5598 } else { /* safe to reuse the matrix */ 5599 reuse = MAT_REUSE_MATRIX; 5600 } 5601 } 5602 /* last check */ 5603 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5604 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5605 reuse = MAT_INITIAL_MATRIX; 5606 } 5607 } else { /* first time, so we need to create the matrix */ 5608 reuse = MAT_INITIAL_MATRIX; 5609 } 5610 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5611 TODO: Get Rid of these conversions */ 5612 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5613 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5614 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5615 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5616 if (matis->A == pcbddc->local_mat) { 5617 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5618 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5619 } else { 5620 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5621 } 5622 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5623 if (matis->A == pcbddc->local_mat) { 5624 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5625 ierr = MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5626 } else { 5627 ierr = MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5628 } 5629 } 5630 /* extract A_RR */ 5631 if (reuse_neumann_solver) { 5632 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5633 5634 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5635 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5636 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5637 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5638 } else { 5639 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5640 } 5641 } else { 5642 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5643 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5644 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5645 } 5646 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5647 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5648 } 5649 if (pcbddc->local_mat->symmetric_set) { 5650 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5651 } 5652 opts = PETSC_FALSE; 5653 if (!pcbddc->ksp_R) { /* create object if not present */ 5654 opts = PETSC_TRUE; 5655 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5656 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5657 /* default */ 5658 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5659 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5660 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5661 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5662 if (issbaij) { 5663 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5664 } else { 5665 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5666 } 5667 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 5668 } 5669 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5670 ierr = MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);CHKERRQ(ierr); 5671 if (opts) { /* Allow user's customization once */ 5672 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5673 } 5674 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5675 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5676 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);CHKERRQ(ierr); 5677 } 5678 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5679 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5680 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5681 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5682 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5683 const PetscInt *idxs; 5684 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5685 5686 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5687 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5688 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5689 for (i=0;i<nl;i++) { 5690 for (d=0;d<cdim;d++) { 5691 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5692 } 5693 } 5694 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5695 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5696 ierr = PetscFree(scoords);CHKERRQ(ierr); 5697 } 5698 5699 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5700 if (!n_R) { 5701 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5702 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5703 } 5704 /* Reuse solver if it is present */ 5705 if (reuse_neumann_solver) { 5706 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5707 5708 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5709 } 5710 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5711 } 5712 5713 if (pcbddc->dbg_flag) { 5714 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5715 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5716 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5717 } 5718 ierr = PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5719 5720 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5721 if (pcbddc->NullSpace_corr[0]) { 5722 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5723 } 5724 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5725 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5726 } 5727 if (neumann && pcbddc->NullSpace_corr[2]) { 5728 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5729 } 5730 /* check Dirichlet and Neumann solvers */ 5731 if (pcbddc->dbg_flag) { 5732 if (dirichlet) { /* Dirichlet */ 5733 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5734 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5735 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5736 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 5737 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5738 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5739 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5740 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5741 } 5742 if (neumann) { /* Neumann */ 5743 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5744 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5745 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5746 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 5747 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5748 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5749 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5750 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5751 } 5752 } 5753 /* free Neumann problem's matrix */ 5754 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5755 PetscFunctionReturn(0); 5756 } 5757 5758 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5759 { 5760 PetscErrorCode ierr; 5761 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5762 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5763 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5764 5765 PetscFunctionBegin; 5766 if (!reuse_solver) { 5767 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5768 } 5769 if (!pcbddc->switch_static) { 5770 if (applytranspose && pcbddc->local_auxmat1) { 5771 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5772 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5773 } 5774 if (!reuse_solver) { 5775 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5776 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5777 } else { 5778 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5779 5780 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5781 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5782 } 5783 } else { 5784 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5785 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5786 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5787 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5788 if (applytranspose && pcbddc->local_auxmat1) { 5789 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5790 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5791 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5792 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5793 } 5794 } 5795 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);CHKERRQ(ierr); 5796 if (!reuse_solver || pcbddc->switch_static) { 5797 if (applytranspose) { 5798 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5799 } else { 5800 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5801 } 5802 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);CHKERRQ(ierr); 5803 } else { 5804 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5805 5806 if (applytranspose) { 5807 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5808 } else { 5809 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5810 } 5811 } 5812 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);CHKERRQ(ierr); 5813 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5814 if (!pcbddc->switch_static) { 5815 if (!reuse_solver) { 5816 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5817 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5818 } else { 5819 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5820 5821 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5822 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5823 } 5824 if (!applytranspose && pcbddc->local_auxmat1) { 5825 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5826 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5827 } 5828 } else { 5829 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5830 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5831 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5832 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5833 if (!applytranspose && pcbddc->local_auxmat1) { 5834 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5835 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5836 } 5837 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5838 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5839 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5840 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5841 } 5842 PetscFunctionReturn(0); 5843 } 5844 5845 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5846 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5847 { 5848 PetscErrorCode ierr; 5849 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5850 PC_IS* pcis = (PC_IS*) (pc->data); 5851 const PetscScalar zero = 0.0; 5852 5853 PetscFunctionBegin; 5854 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5855 if (!pcbddc->benign_apply_coarse_only) { 5856 if (applytranspose) { 5857 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5858 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5859 } else { 5860 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5861 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5862 } 5863 } else { 5864 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5865 } 5866 5867 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5868 if (pcbddc->benign_n) { 5869 PetscScalar *array; 5870 PetscInt j; 5871 5872 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5873 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5874 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5875 } 5876 5877 /* start communications from local primal nodes to rhs of coarse solver */ 5878 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5879 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5880 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5881 5882 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5883 if (pcbddc->coarse_ksp) { 5884 Mat coarse_mat; 5885 Vec rhs,sol; 5886 MatNullSpace nullsp; 5887 PetscBool isbddc = PETSC_FALSE; 5888 5889 if (pcbddc->benign_have_null) { 5890 PC coarse_pc; 5891 5892 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5893 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5894 /* we need to propagate to coarser levels the need for a possible benign correction */ 5895 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5896 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5897 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5898 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5899 } 5900 } 5901 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5902 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5903 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5904 if (applytranspose) { 5905 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5906 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5907 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5908 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5909 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5910 ierr = MatGetTransposeNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5911 if (nullsp) { 5912 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5913 } 5914 } else { 5915 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5916 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5917 PC coarse_pc; 5918 5919 if (nullsp) { 5920 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5921 } 5922 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5923 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5924 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5925 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5926 } else { 5927 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5928 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5929 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5930 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5931 if (nullsp) { 5932 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5933 } 5934 } 5935 } 5936 /* we don't need the benign correction at coarser levels anymore */ 5937 if (pcbddc->benign_have_null && isbddc) { 5938 PC coarse_pc; 5939 PC_BDDC* coarsepcbddc; 5940 5941 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5942 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5943 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5944 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5945 } 5946 } 5947 5948 /* Local solution on R nodes */ 5949 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5950 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5951 } 5952 /* communications from coarse sol to local primal nodes */ 5953 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5954 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5955 5956 /* Sum contributions from the two levels */ 5957 if (!pcbddc->benign_apply_coarse_only) { 5958 if (applytranspose) { 5959 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5960 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5961 } else { 5962 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5963 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5964 } 5965 /* store p0 */ 5966 if (pcbddc->benign_n) { 5967 PetscScalar *array; 5968 PetscInt j; 5969 5970 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5971 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5972 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5973 } 5974 } else { /* expand the coarse solution */ 5975 if (applytranspose) { 5976 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5977 } else { 5978 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5979 } 5980 } 5981 PetscFunctionReturn(0); 5982 } 5983 5984 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5985 { 5986 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5987 Vec from,to; 5988 const PetscScalar *array; 5989 PetscErrorCode ierr; 5990 5991 PetscFunctionBegin; 5992 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5993 from = pcbddc->coarse_vec; 5994 to = pcbddc->vec1_P; 5995 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5996 Vec tvec; 5997 5998 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5999 ierr = VecResetArray(tvec);CHKERRQ(ierr); 6000 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 6001 ierr = VecGetArrayRead(tvec,&array);CHKERRQ(ierr); 6002 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 6003 ierr = VecRestoreArrayRead(tvec,&array);CHKERRQ(ierr); 6004 } 6005 } else { /* from local to global -> put data in coarse right hand side */ 6006 from = pcbddc->vec1_P; 6007 to = pcbddc->coarse_vec; 6008 } 6009 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6010 PetscFunctionReturn(0); 6011 } 6012 6013 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6014 { 6015 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 6016 Vec from,to; 6017 const PetscScalar *array; 6018 PetscErrorCode ierr; 6019 6020 PetscFunctionBegin; 6021 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6022 from = pcbddc->coarse_vec; 6023 to = pcbddc->vec1_P; 6024 } else { /* from local to global -> put data in coarse right hand side */ 6025 from = pcbddc->vec1_P; 6026 to = pcbddc->coarse_vec; 6027 } 6028 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6029 if (smode == SCATTER_FORWARD) { 6030 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6031 Vec tvec; 6032 6033 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 6034 ierr = VecGetArrayRead(to,&array);CHKERRQ(ierr); 6035 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 6036 ierr = VecRestoreArrayRead(to,&array);CHKERRQ(ierr); 6037 } 6038 } else { 6039 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6040 ierr = VecResetArray(from);CHKERRQ(ierr); 6041 } 6042 } 6043 PetscFunctionReturn(0); 6044 } 6045 6046 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6047 { 6048 PetscErrorCode ierr; 6049 PC_IS* pcis = (PC_IS*)(pc->data); 6050 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6051 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6052 /* one and zero */ 6053 PetscScalar one=1.0,zero=0.0; 6054 /* space to store constraints and their local indices */ 6055 PetscScalar *constraints_data; 6056 PetscInt *constraints_idxs,*constraints_idxs_B; 6057 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6058 PetscInt *constraints_n; 6059 /* iterators */ 6060 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6061 /* BLAS integers */ 6062 PetscBLASInt lwork,lierr; 6063 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6064 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6065 /* reuse */ 6066 PetscInt olocal_primal_size,olocal_primal_size_cc; 6067 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6068 /* change of basis */ 6069 PetscBool qr_needed; 6070 PetscBT change_basis,qr_needed_idx; 6071 /* auxiliary stuff */ 6072 PetscInt *nnz,*is_indices; 6073 PetscInt ncc; 6074 /* some quantities */ 6075 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6076 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6077 PetscReal tol; /* tolerance for retaining eigenmodes */ 6078 6079 PetscFunctionBegin; 6080 tol = PetscSqrtReal(PETSC_SMALL); 6081 /* Destroy Mat objects computed previously */ 6082 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6083 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6084 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 6085 /* save info on constraints from previous setup (if any) */ 6086 olocal_primal_size = pcbddc->local_primal_size; 6087 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6088 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 6089 ierr = PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);CHKERRQ(ierr); 6090 ierr = PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);CHKERRQ(ierr); 6091 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6092 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6093 6094 if (!pcbddc->adaptive_selection) { 6095 IS ISForVertices,*ISForFaces,*ISForEdges; 6096 MatNullSpace nearnullsp; 6097 const Vec *nearnullvecs; 6098 Vec *localnearnullsp; 6099 PetscScalar *array; 6100 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 6101 PetscBool nnsp_has_cnst; 6102 /* LAPACK working arrays for SVD or POD */ 6103 PetscBool skip_lapack,boolforchange; 6104 PetscScalar *work; 6105 PetscReal *singular_vals; 6106 #if defined(PETSC_USE_COMPLEX) 6107 PetscReal *rwork; 6108 #endif 6109 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6110 PetscBLASInt dummy_int=1; 6111 PetscScalar dummy_scalar=1.; 6112 PetscBool use_pod = PETSC_FALSE; 6113 6114 /* MKL SVD with same input gives different results on different processes! */ 6115 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL) 6116 use_pod = PETSC_TRUE; 6117 #endif 6118 /* Get index sets for faces, edges and vertices from graph */ 6119 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 6120 /* print some info */ 6121 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6122 PetscInt nv; 6123 6124 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6125 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 6126 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6127 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6128 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6129 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 6130 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 6131 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6132 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6133 } 6134 6135 /* free unneeded index sets */ 6136 if (!pcbddc->use_vertices) { 6137 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6138 } 6139 if (!pcbddc->use_edges) { 6140 for (i=0;i<n_ISForEdges;i++) { 6141 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6142 } 6143 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6144 n_ISForEdges = 0; 6145 } 6146 if (!pcbddc->use_faces) { 6147 for (i=0;i<n_ISForFaces;i++) { 6148 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6149 } 6150 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6151 n_ISForFaces = 0; 6152 } 6153 6154 /* check if near null space is attached to global mat */ 6155 if (pcbddc->use_nnsp) { 6156 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 6157 } else nearnullsp = NULL; 6158 6159 if (nearnullsp) { 6160 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 6161 /* remove any stored info */ 6162 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 6163 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6164 /* store information for BDDC solver reuse */ 6165 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 6166 pcbddc->onearnullspace = nearnullsp; 6167 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6168 for (i=0;i<nnsp_size;i++) { 6169 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 6170 } 6171 } else { /* if near null space is not provided BDDC uses constants by default */ 6172 nnsp_size = 0; 6173 nnsp_has_cnst = PETSC_TRUE; 6174 } 6175 /* get max number of constraints on a single cc */ 6176 max_constraints = nnsp_size; 6177 if (nnsp_has_cnst) max_constraints++; 6178 6179 /* 6180 Evaluate maximum storage size needed by the procedure 6181 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6182 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6183 There can be multiple constraints per connected component 6184 */ 6185 n_vertices = 0; 6186 if (ISForVertices) { 6187 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 6188 } 6189 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6190 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 6191 6192 total_counts = n_ISForFaces+n_ISForEdges; 6193 total_counts *= max_constraints; 6194 total_counts += n_vertices; 6195 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 6196 6197 total_counts = 0; 6198 max_size_of_constraint = 0; 6199 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6200 IS used_is; 6201 if (i<n_ISForEdges) { 6202 used_is = ISForEdges[i]; 6203 } else { 6204 used_is = ISForFaces[i-n_ISForEdges]; 6205 } 6206 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 6207 total_counts += j; 6208 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6209 } 6210 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 6211 6212 /* get local part of global near null space vectors */ 6213 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 6214 for (k=0;k<nnsp_size;k++) { 6215 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 6216 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6217 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6218 } 6219 6220 /* whether or not to skip lapack calls */ 6221 skip_lapack = PETSC_TRUE; 6222 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6223 6224 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6225 if (!skip_lapack) { 6226 PetscScalar temp_work; 6227 6228 if (use_pod) { 6229 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6230 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 6231 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 6232 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 6233 #if defined(PETSC_USE_COMPLEX) 6234 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 6235 #endif 6236 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6237 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6238 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 6239 lwork = -1; 6240 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6241 #if !defined(PETSC_USE_COMPLEX) 6242 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6243 #else 6244 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6245 #endif 6246 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6247 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6248 } else { 6249 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6250 /* SVD */ 6251 PetscInt max_n,min_n; 6252 max_n = max_size_of_constraint; 6253 min_n = max_constraints; 6254 if (max_size_of_constraint < max_constraints) { 6255 min_n = max_size_of_constraint; 6256 max_n = max_constraints; 6257 } 6258 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 6259 #if defined(PETSC_USE_COMPLEX) 6260 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 6261 #endif 6262 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6263 lwork = -1; 6264 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 6265 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 6266 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 6267 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6268 #if !defined(PETSC_USE_COMPLEX) 6269 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6270 #else 6271 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6272 #endif 6273 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6274 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6275 #else 6276 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6277 #endif /* on missing GESVD */ 6278 } 6279 /* Allocate optimal workspace */ 6280 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 6281 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 6282 } 6283 /* Now we can loop on constraining sets */ 6284 total_counts = 0; 6285 constraints_idxs_ptr[0] = 0; 6286 constraints_data_ptr[0] = 0; 6287 /* vertices */ 6288 if (n_vertices) { 6289 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6290 ierr = PetscArraycpy(constraints_idxs,is_indices,n_vertices);CHKERRQ(ierr); 6291 for (i=0;i<n_vertices;i++) { 6292 constraints_n[total_counts] = 1; 6293 constraints_data[total_counts] = 1.0; 6294 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6295 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6296 total_counts++; 6297 } 6298 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6299 n_vertices = total_counts; 6300 } 6301 6302 /* edges and faces */ 6303 total_counts_cc = total_counts; 6304 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6305 IS used_is; 6306 PetscBool idxs_copied = PETSC_FALSE; 6307 6308 if (ncc<n_ISForEdges) { 6309 used_is = ISForEdges[ncc]; 6310 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6311 } else { 6312 used_is = ISForFaces[ncc-n_ISForEdges]; 6313 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6314 } 6315 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6316 6317 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6318 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6319 /* change of basis should not be performed on local periodic nodes */ 6320 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6321 if (nnsp_has_cnst) { 6322 PetscScalar quad_value; 6323 6324 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6325 idxs_copied = PETSC_TRUE; 6326 6327 if (!pcbddc->use_nnsp_true) { 6328 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6329 } else { 6330 quad_value = 1.0; 6331 } 6332 for (j=0;j<size_of_constraint;j++) { 6333 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6334 } 6335 temp_constraints++; 6336 total_counts++; 6337 } 6338 for (k=0;k<nnsp_size;k++) { 6339 PetscReal real_value; 6340 PetscScalar *ptr_to_data; 6341 6342 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6343 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6344 for (j=0;j<size_of_constraint;j++) { 6345 ptr_to_data[j] = array[is_indices[j]]; 6346 } 6347 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6348 /* check if array is null on the connected component */ 6349 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6350 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6351 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6352 temp_constraints++; 6353 total_counts++; 6354 if (!idxs_copied) { 6355 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6356 idxs_copied = PETSC_TRUE; 6357 } 6358 } 6359 } 6360 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6361 valid_constraints = temp_constraints; 6362 if (!pcbddc->use_nnsp_true && temp_constraints) { 6363 if (temp_constraints == 1) { /* just normalize the constraint */ 6364 PetscScalar norm,*ptr_to_data; 6365 6366 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6367 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6368 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6369 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6370 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6371 } else { /* perform SVD */ 6372 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6373 6374 if (use_pod) { 6375 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6376 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6377 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6378 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6379 from that computed using LAPACKgesvd 6380 -> This is due to a different computation of eigenvectors in LAPACKheev 6381 -> The quality of the POD-computed basis will be the same */ 6382 ierr = PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);CHKERRQ(ierr); 6383 /* Store upper triangular part of correlation matrix */ 6384 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6385 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6386 for (j=0;j<temp_constraints;j++) { 6387 for (k=0;k<j+1;k++) { 6388 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6389 } 6390 } 6391 /* compute eigenvalues and eigenvectors of correlation matrix */ 6392 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6393 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6394 #if !defined(PETSC_USE_COMPLEX) 6395 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6396 #else 6397 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6398 #endif 6399 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6400 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6401 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6402 j = 0; 6403 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6404 total_counts = total_counts-j; 6405 valid_constraints = temp_constraints-j; 6406 /* scale and copy POD basis into used quadrature memory */ 6407 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6408 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6409 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6410 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6411 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6412 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6413 if (j<temp_constraints) { 6414 PetscInt ii; 6415 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6416 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6417 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6418 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6419 for (k=0;k<temp_constraints-j;k++) { 6420 for (ii=0;ii<size_of_constraint;ii++) { 6421 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6422 } 6423 } 6424 } 6425 } else { 6426 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6427 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6428 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6429 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6430 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6431 #if !defined(PETSC_USE_COMPLEX) 6432 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6433 #else 6434 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6435 #endif 6436 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6437 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6438 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6439 k = temp_constraints; 6440 if (k > size_of_constraint) k = size_of_constraint; 6441 j = 0; 6442 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6443 valid_constraints = k-j; 6444 total_counts = total_counts-temp_constraints+valid_constraints; 6445 #else 6446 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6447 #endif /* on missing GESVD */ 6448 } 6449 } 6450 } 6451 /* update pointers information */ 6452 if (valid_constraints) { 6453 constraints_n[total_counts_cc] = valid_constraints; 6454 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6455 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6456 /* set change_of_basis flag */ 6457 if (boolforchange) { 6458 PetscBTSet(change_basis,total_counts_cc); 6459 } 6460 total_counts_cc++; 6461 } 6462 } 6463 /* free workspace */ 6464 if (!skip_lapack) { 6465 ierr = PetscFree(work);CHKERRQ(ierr); 6466 #if defined(PETSC_USE_COMPLEX) 6467 ierr = PetscFree(rwork);CHKERRQ(ierr); 6468 #endif 6469 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6470 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6471 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6472 } 6473 for (k=0;k<nnsp_size;k++) { 6474 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6475 } 6476 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6477 /* free index sets of faces, edges and vertices */ 6478 for (i=0;i<n_ISForFaces;i++) { 6479 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6480 } 6481 if (n_ISForFaces) { 6482 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6483 } 6484 for (i=0;i<n_ISForEdges;i++) { 6485 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6486 } 6487 if (n_ISForEdges) { 6488 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6489 } 6490 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6491 } else { 6492 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6493 6494 total_counts = 0; 6495 n_vertices = 0; 6496 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6497 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6498 } 6499 max_constraints = 0; 6500 total_counts_cc = 0; 6501 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6502 total_counts += pcbddc->adaptive_constraints_n[i]; 6503 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6504 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6505 } 6506 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6507 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6508 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6509 constraints_data = pcbddc->adaptive_constraints_data; 6510 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6511 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6512 total_counts_cc = 0; 6513 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6514 if (pcbddc->adaptive_constraints_n[i]) { 6515 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6516 } 6517 } 6518 6519 max_size_of_constraint = 0; 6520 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6521 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6522 /* Change of basis */ 6523 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6524 if (pcbddc->use_change_of_basis) { 6525 for (i=0;i<sub_schurs->n_subs;i++) { 6526 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6527 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6528 } 6529 } 6530 } 6531 } 6532 pcbddc->local_primal_size = total_counts; 6533 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6534 6535 /* map constraints_idxs in boundary numbering */ 6536 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6537 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6538 6539 /* Create constraint matrix */ 6540 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6541 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6542 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6543 6544 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6545 /* determine if a QR strategy is needed for change of basis */ 6546 qr_needed = pcbddc->use_qr_single; 6547 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6548 total_primal_vertices=0; 6549 pcbddc->local_primal_size_cc = 0; 6550 for (i=0;i<total_counts_cc;i++) { 6551 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6552 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6553 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6554 pcbddc->local_primal_size_cc += 1; 6555 } else if (PetscBTLookup(change_basis,i)) { 6556 for (k=0;k<constraints_n[i];k++) { 6557 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6558 } 6559 pcbddc->local_primal_size_cc += constraints_n[i]; 6560 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6561 PetscBTSet(qr_needed_idx,i); 6562 qr_needed = PETSC_TRUE; 6563 } 6564 } else { 6565 pcbddc->local_primal_size_cc += 1; 6566 } 6567 } 6568 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6569 pcbddc->n_vertices = total_primal_vertices; 6570 /* permute indices in order to have a sorted set of vertices */ 6571 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6572 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6573 ierr = PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);CHKERRQ(ierr); 6574 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6575 6576 /* nonzero structure of constraint matrix */ 6577 /* and get reference dof for local constraints */ 6578 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6579 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6580 6581 j = total_primal_vertices; 6582 total_counts = total_primal_vertices; 6583 cum = total_primal_vertices; 6584 for (i=n_vertices;i<total_counts_cc;i++) { 6585 if (!PetscBTLookup(change_basis,i)) { 6586 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6587 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6588 cum++; 6589 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6590 for (k=0;k<constraints_n[i];k++) { 6591 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6592 nnz[j+k] = size_of_constraint; 6593 } 6594 j += constraints_n[i]; 6595 } 6596 } 6597 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6598 ierr = MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6599 ierr = PetscFree(nnz);CHKERRQ(ierr); 6600 6601 /* set values in constraint matrix */ 6602 for (i=0;i<total_primal_vertices;i++) { 6603 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6604 } 6605 total_counts = total_primal_vertices; 6606 for (i=n_vertices;i<total_counts_cc;i++) { 6607 if (!PetscBTLookup(change_basis,i)) { 6608 PetscInt *cols; 6609 6610 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6611 cols = constraints_idxs+constraints_idxs_ptr[i]; 6612 for (k=0;k<constraints_n[i];k++) { 6613 PetscInt row = total_counts+k; 6614 PetscScalar *vals; 6615 6616 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6617 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6618 } 6619 total_counts += constraints_n[i]; 6620 } 6621 } 6622 /* assembling */ 6623 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6624 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6625 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6626 6627 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6628 if (pcbddc->use_change_of_basis) { 6629 /* dual and primal dofs on a single cc */ 6630 PetscInt dual_dofs,primal_dofs; 6631 /* working stuff for GEQRF */ 6632 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6633 PetscBLASInt lqr_work; 6634 /* working stuff for UNGQR */ 6635 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6636 PetscBLASInt lgqr_work; 6637 /* working stuff for TRTRS */ 6638 PetscScalar *trs_rhs = NULL; 6639 PetscBLASInt Blas_NRHS; 6640 /* pointers for values insertion into change of basis matrix */ 6641 PetscInt *start_rows,*start_cols; 6642 PetscScalar *start_vals; 6643 /* working stuff for values insertion */ 6644 PetscBT is_primal; 6645 PetscInt *aux_primal_numbering_B; 6646 /* matrix sizes */ 6647 PetscInt global_size,local_size; 6648 /* temporary change of basis */ 6649 Mat localChangeOfBasisMatrix; 6650 /* extra space for debugging */ 6651 PetscScalar *dbg_work = NULL; 6652 6653 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6654 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6655 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6656 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6657 /* nonzeros for local mat */ 6658 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6659 if (!pcbddc->benign_change || pcbddc->fake_change) { 6660 for (i=0;i<pcis->n;i++) nnz[i]=1; 6661 } else { 6662 const PetscInt *ii; 6663 PetscInt n; 6664 PetscBool flg_row; 6665 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6666 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6667 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6668 } 6669 for (i=n_vertices;i<total_counts_cc;i++) { 6670 if (PetscBTLookup(change_basis,i)) { 6671 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6672 if (PetscBTLookup(qr_needed_idx,i)) { 6673 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6674 } else { 6675 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6676 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6677 } 6678 } 6679 } 6680 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6681 ierr = MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6682 ierr = PetscFree(nnz);CHKERRQ(ierr); 6683 /* Set interior change in the matrix */ 6684 if (!pcbddc->benign_change || pcbddc->fake_change) { 6685 for (i=0;i<pcis->n;i++) { 6686 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6687 } 6688 } else { 6689 const PetscInt *ii,*jj; 6690 PetscScalar *aa; 6691 PetscInt n; 6692 PetscBool flg_row; 6693 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6694 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6695 for (i=0;i<n;i++) { 6696 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6697 } 6698 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6699 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6700 } 6701 6702 if (pcbddc->dbg_flag) { 6703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6705 } 6706 6707 /* Now we loop on the constraints which need a change of basis */ 6708 /* 6709 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6710 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6711 6712 Basic blocks of change of basis matrix T computed by 6713 6714 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6715 6716 | 1 0 ... 0 s_1/S | 6717 | 0 1 ... 0 s_2/S | 6718 | ... | 6719 | 0 ... 1 s_{n-1}/S | 6720 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6721 6722 with S = \sum_{i=1}^n s_i^2 6723 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6724 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6725 6726 - QR decomposition of constraints otherwise 6727 */ 6728 if (qr_needed && max_size_of_constraint) { 6729 /* space to store Q */ 6730 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6731 /* array to store scaling factors for reflectors */ 6732 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6733 /* first we issue queries for optimal work */ 6734 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6735 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6736 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6737 lqr_work = -1; 6738 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6739 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6740 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6741 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6742 lgqr_work = -1; 6743 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6744 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6745 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6746 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6747 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6748 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6749 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6750 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6751 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6752 /* array to store rhs and solution of triangular solver */ 6753 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6754 /* allocating workspace for check */ 6755 if (pcbddc->dbg_flag) { 6756 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6757 } 6758 } 6759 /* array to store whether a node is primal or not */ 6760 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6761 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6762 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6763 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6764 for (i=0;i<total_primal_vertices;i++) { 6765 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6766 } 6767 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6768 6769 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6770 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6771 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6772 if (PetscBTLookup(change_basis,total_counts)) { 6773 /* get constraint info */ 6774 primal_dofs = constraints_n[total_counts]; 6775 dual_dofs = size_of_constraint-primal_dofs; 6776 6777 if (pcbddc->dbg_flag) { 6778 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6779 } 6780 6781 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6782 6783 /* copy quadrature constraints for change of basis check */ 6784 if (pcbddc->dbg_flag) { 6785 ierr = PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6786 } 6787 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6788 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6789 6790 /* compute QR decomposition of constraints */ 6791 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6792 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6793 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6794 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6795 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6796 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6797 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6798 6799 /* explictly compute R^-T */ 6800 ierr = PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);CHKERRQ(ierr); 6801 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6802 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6803 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6804 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6805 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6806 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6807 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6808 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6809 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6810 6811 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6812 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6813 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6814 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6815 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6816 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6817 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6818 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6819 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6820 6821 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6822 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6823 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6824 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6825 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6826 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6827 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6828 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6829 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6830 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6831 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6832 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6833 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6834 6835 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6836 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6837 /* insert cols for primal dofs */ 6838 for (j=0;j<primal_dofs;j++) { 6839 start_vals = &qr_basis[j*size_of_constraint]; 6840 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6841 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6842 } 6843 /* insert cols for dual dofs */ 6844 for (j=0,k=0;j<dual_dofs;k++) { 6845 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6846 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6847 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6848 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6849 j++; 6850 } 6851 } 6852 6853 /* check change of basis */ 6854 if (pcbddc->dbg_flag) { 6855 PetscInt ii,jj; 6856 PetscBool valid_qr=PETSC_TRUE; 6857 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6858 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6859 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6860 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6861 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6862 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6863 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6864 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6865 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6866 for (jj=0;jj<size_of_constraint;jj++) { 6867 for (ii=0;ii<primal_dofs;ii++) { 6868 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6869 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6870 } 6871 } 6872 if (!valid_qr) { 6873 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6874 for (jj=0;jj<size_of_constraint;jj++) { 6875 for (ii=0;ii<primal_dofs;ii++) { 6876 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6878 } 6879 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6880 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6881 } 6882 } 6883 } 6884 } else { 6885 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6886 } 6887 } 6888 } else { /* simple transformation block */ 6889 PetscInt row,col; 6890 PetscScalar val,norm; 6891 6892 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6893 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6894 for (j=0;j<size_of_constraint;j++) { 6895 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6896 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6897 if (!PetscBTLookup(is_primal,row_B)) { 6898 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6899 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6900 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6901 } else { 6902 for (k=0;k<size_of_constraint;k++) { 6903 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6904 if (row != col) { 6905 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6906 } else { 6907 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6908 } 6909 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6910 } 6911 } 6912 } 6913 if (pcbddc->dbg_flag) { 6914 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6915 } 6916 } 6917 } else { 6918 if (pcbddc->dbg_flag) { 6919 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6920 } 6921 } 6922 } 6923 6924 /* free workspace */ 6925 if (qr_needed) { 6926 if (pcbddc->dbg_flag) { 6927 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6928 } 6929 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6930 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6931 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6932 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6933 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6934 } 6935 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6936 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6937 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6938 6939 /* assembling of global change of variable */ 6940 if (!pcbddc->fake_change) { 6941 Mat tmat; 6942 PetscInt bs; 6943 6944 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6945 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6946 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6947 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6948 ierr = MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6949 ierr = MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6950 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6951 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6952 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6953 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6954 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6955 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6956 ierr = MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6957 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6958 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6959 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6960 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6961 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6962 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6963 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6964 6965 /* check */ 6966 if (pcbddc->dbg_flag) { 6967 PetscReal error; 6968 Vec x,x_change; 6969 6970 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6971 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6972 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6973 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6974 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6975 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6976 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6977 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6978 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6979 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6980 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6981 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6982 if (error > PETSC_SMALL) { 6983 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6984 } 6985 ierr = VecDestroy(&x);CHKERRQ(ierr); 6986 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6987 } 6988 /* adapt sub_schurs computed (if any) */ 6989 if (pcbddc->use_deluxe_scaling) { 6990 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6991 6992 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6993 if (sub_schurs && sub_schurs->S_Ej_all) { 6994 Mat S_new,tmat; 6995 IS is_all_N,is_V_Sall = NULL; 6996 6997 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6998 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6999 if (pcbddc->deluxe_zerorows) { 7000 ISLocalToGlobalMapping NtoSall; 7001 IS is_V; 7002 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 7003 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 7004 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 7005 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 7006 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 7007 } 7008 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 7009 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7010 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 7011 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7012 if (pcbddc->deluxe_zerorows) { 7013 const PetscScalar *array; 7014 const PetscInt *idxs_V,*idxs_all; 7015 PetscInt i,n_V; 7016 7017 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7018 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 7019 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7020 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7021 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 7022 for (i=0;i<n_V;i++) { 7023 PetscScalar val; 7024 PetscInt idx; 7025 7026 idx = idxs_V[i]; 7027 val = array[idxs_all[idxs_V[i]]]; 7028 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 7029 } 7030 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7031 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7032 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 7033 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7034 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7035 } 7036 sub_schurs->S_Ej_all = S_new; 7037 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7038 if (sub_schurs->sum_S_Ej_all) { 7039 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7040 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 7041 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7042 if (pcbddc->deluxe_zerorows) { 7043 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7044 } 7045 sub_schurs->sum_S_Ej_all = S_new; 7046 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7047 } 7048 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 7049 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 7050 } 7051 /* destroy any change of basis context in sub_schurs */ 7052 if (sub_schurs && sub_schurs->change) { 7053 PetscInt i; 7054 7055 for (i=0;i<sub_schurs->n_subs;i++) { 7056 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 7057 } 7058 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 7059 } 7060 } 7061 if (pcbddc->switch_static) { /* need to save the local change */ 7062 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7063 } else { 7064 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 7065 } 7066 /* determine if any process has changed the pressures locally */ 7067 pcbddc->change_interior = pcbddc->benign_have_null; 7068 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7069 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 7070 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7071 pcbddc->use_qr_single = qr_needed; 7072 } 7073 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7074 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7075 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 7076 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7077 } else { 7078 Mat benign_global = NULL; 7079 if (pcbddc->benign_have_null) { 7080 Mat M; 7081 7082 pcbddc->change_interior = PETSC_TRUE; 7083 ierr = VecCopy(matis->counter,pcis->vec1_N);CHKERRQ(ierr); 7084 ierr = VecReciprocal(pcis->vec1_N);CHKERRQ(ierr); 7085 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);CHKERRQ(ierr); 7086 if (pcbddc->benign_change) { 7087 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 7088 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 7089 } else { 7090 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);CHKERRQ(ierr); 7091 ierr = MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);CHKERRQ(ierr); 7092 } 7093 ierr = MatISSetLocalMat(benign_global,M);CHKERRQ(ierr); 7094 ierr = MatDestroy(&M);CHKERRQ(ierr); 7095 ierr = MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7096 ierr = MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7097 } 7098 if (pcbddc->user_ChangeOfBasisMatrix) { 7099 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 7100 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 7101 } else if (pcbddc->benign_have_null) { 7102 pcbddc->ChangeOfBasisMatrix = benign_global; 7103 } 7104 } 7105 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7106 IS is_global; 7107 const PetscInt *gidxs; 7108 7109 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7110 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 7111 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7112 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 7113 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 7114 } 7115 } 7116 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7117 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 7118 } 7119 7120 if (!pcbddc->fake_change) { 7121 /* add pressure dofs to set of primal nodes for numbering purposes */ 7122 for (i=0;i<pcbddc->benign_n;i++) { 7123 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7124 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7125 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7126 pcbddc->local_primal_size_cc++; 7127 pcbddc->local_primal_size++; 7128 } 7129 7130 /* check if a new primal space has been introduced (also take into account benign trick) */ 7131 pcbddc->new_primal_space_local = PETSC_TRUE; 7132 if (olocal_primal_size == pcbddc->local_primal_size) { 7133 ierr = PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7134 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7135 if (!pcbddc->new_primal_space_local) { 7136 ierr = PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7137 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7138 } 7139 } 7140 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7141 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 7142 } 7143 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 7144 7145 /* flush dbg viewer */ 7146 if (pcbddc->dbg_flag) { 7147 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7148 } 7149 7150 /* free workspace */ 7151 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 7152 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 7153 if (!pcbddc->adaptive_selection) { 7154 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 7155 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 7156 } else { 7157 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 7158 pcbddc->adaptive_constraints_idxs_ptr, 7159 pcbddc->adaptive_constraints_data_ptr, 7160 pcbddc->adaptive_constraints_idxs, 7161 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 7162 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 7163 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 7164 } 7165 PetscFunctionReturn(0); 7166 } 7167 7168 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7169 { 7170 ISLocalToGlobalMapping map; 7171 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7172 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7173 PetscInt i,N; 7174 PetscBool rcsr = PETSC_FALSE; 7175 PetscErrorCode ierr; 7176 7177 PetscFunctionBegin; 7178 if (pcbddc->recompute_topography) { 7179 pcbddc->graphanalyzed = PETSC_FALSE; 7180 /* Reset previously computed graph */ 7181 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 7182 /* Init local Graph struct */ 7183 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 7184 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 7185 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 7186 7187 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7188 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7189 } 7190 /* Check validity of the csr graph passed in by the user */ 7191 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7192 7193 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7194 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7195 PetscInt *xadj,*adjncy; 7196 PetscInt nvtxs; 7197 PetscBool flg_row=PETSC_FALSE; 7198 7199 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7200 if (flg_row) { 7201 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 7202 pcbddc->computed_rowadj = PETSC_TRUE; 7203 } 7204 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7205 rcsr = PETSC_TRUE; 7206 } 7207 if (pcbddc->dbg_flag) { 7208 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7209 } 7210 7211 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7212 PetscReal *lcoords; 7213 PetscInt n; 7214 MPI_Datatype dimrealtype; 7215 7216 /* TODO: support for blocked */ 7217 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7218 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7219 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 7220 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRMPI(ierr); 7221 ierr = MPI_Type_commit(&dimrealtype);CHKERRMPI(ierr); 7222 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);CHKERRQ(ierr); 7223 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);CHKERRQ(ierr); 7224 ierr = MPI_Type_free(&dimrealtype);CHKERRMPI(ierr); 7225 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 7226 7227 pcbddc->mat_graph->coords = lcoords; 7228 pcbddc->mat_graph->cloc = PETSC_TRUE; 7229 pcbddc->mat_graph->cnloc = n; 7230 } 7231 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7232 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7233 7234 /* Setup of Graph */ 7235 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7236 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7237 7238 /* attach info on disconnected subdomains if present */ 7239 if (pcbddc->n_local_subs) { 7240 PetscInt *local_subs,n,totn; 7241 7242 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7243 ierr = PetscMalloc1(n,&local_subs);CHKERRQ(ierr); 7244 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7245 for (i=0;i<pcbddc->n_local_subs;i++) { 7246 const PetscInt *idxs; 7247 PetscInt nl,j; 7248 7249 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 7250 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7251 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7252 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7253 } 7254 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7255 pcbddc->mat_graph->n_local_subs = totn + 1; 7256 pcbddc->mat_graph->local_subs = local_subs; 7257 } 7258 } 7259 7260 if (!pcbddc->graphanalyzed) { 7261 /* Graph's connected components analysis */ 7262 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 7263 pcbddc->graphanalyzed = PETSC_TRUE; 7264 pcbddc->corner_selected = pcbddc->corner_selection; 7265 } 7266 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7267 PetscFunctionReturn(0); 7268 } 7269 7270 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7271 { 7272 PetscInt i,j,n; 7273 PetscScalar *alphas; 7274 PetscReal norm,*onorms; 7275 PetscErrorCode ierr; 7276 7277 PetscFunctionBegin; 7278 n = *nio; 7279 if (!n) PetscFunctionReturn(0); 7280 ierr = PetscMalloc2(n,&alphas,n,&onorms);CHKERRQ(ierr); 7281 ierr = VecNormalize(vecs[0],&norm);CHKERRQ(ierr); 7282 if (norm < PETSC_SMALL) { 7283 onorms[0] = 0.0; 7284 ierr = VecSet(vecs[0],0.0);CHKERRQ(ierr); 7285 } else { 7286 onorms[0] = norm; 7287 } 7288 7289 for (i=1;i<n;i++) { 7290 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 7291 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7292 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 7293 ierr = VecNormalize(vecs[i],&norm);CHKERRQ(ierr); 7294 if (norm < PETSC_SMALL) { 7295 onorms[i] = 0.0; 7296 ierr = VecSet(vecs[i],0.0);CHKERRQ(ierr); 7297 } else { 7298 onorms[i] = norm; 7299 } 7300 } 7301 /* push nonzero vectors at the beginning */ 7302 for (i=0;i<n;i++) { 7303 if (onorms[i] == 0.0) { 7304 for (j=i+1;j<n;j++) { 7305 if (onorms[j] != 0.0) { 7306 ierr = VecCopy(vecs[j],vecs[i]);CHKERRQ(ierr); 7307 onorms[j] = 0.0; 7308 } 7309 } 7310 } 7311 } 7312 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7313 ierr = PetscFree2(alphas,onorms);CHKERRQ(ierr); 7314 PetscFunctionReturn(0); 7315 } 7316 7317 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7318 { 7319 Mat A; 7320 PetscInt n_neighs,*neighs,*n_shared,**shared; 7321 PetscMPIInt size,rank,color; 7322 PetscInt *xadj,*adjncy; 7323 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7324 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7325 PetscInt void_procs,*procs_candidates = NULL; 7326 PetscInt xadj_count,*count; 7327 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7328 PetscSubcomm psubcomm; 7329 MPI_Comm subcomm; 7330 PetscErrorCode ierr; 7331 7332 PetscFunctionBegin; 7333 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7334 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7335 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7336 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7337 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7338 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7339 7340 if (have_void) *have_void = PETSC_FALSE; 7341 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRMPI(ierr); 7342 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 7343 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7344 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7345 im_active = !!n; 7346 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 7347 void_procs = size - active_procs; 7348 /* get ranks of of non-active processes in mat communicator */ 7349 if (void_procs) { 7350 PetscInt ncand; 7351 7352 if (have_void) *have_void = PETSC_TRUE; 7353 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7354 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 7355 for (i=0,ncand=0;i<size;i++) { 7356 if (!procs_candidates[i]) { 7357 procs_candidates[ncand++] = i; 7358 } 7359 } 7360 /* force n_subdomains to be not greater that the number of non-active processes */ 7361 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7362 } 7363 7364 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7365 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7366 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7367 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7368 PetscInt issize,isidx,dest; 7369 if (*n_subdomains == 1) dest = 0; 7370 else dest = rank; 7371 if (im_active) { 7372 issize = 1; 7373 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7374 isidx = procs_candidates[dest]; 7375 } else { 7376 isidx = dest; 7377 } 7378 } else { 7379 issize = 0; 7380 isidx = -1; 7381 } 7382 if (*n_subdomains != 1) *n_subdomains = active_procs; 7383 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7384 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7385 PetscFunctionReturn(0); 7386 } 7387 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7388 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7389 threshold = PetscMax(threshold,2); 7390 7391 /* Get info on mapping */ 7392 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7393 7394 /* build local CSR graph of subdomains' connectivity */ 7395 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7396 xadj[0] = 0; 7397 xadj[1] = PetscMax(n_neighs-1,0); 7398 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7399 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7400 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7401 for (i=1;i<n_neighs;i++) 7402 for (j=0;j<n_shared[i];j++) 7403 count[shared[i][j]] += 1; 7404 7405 xadj_count = 0; 7406 for (i=1;i<n_neighs;i++) { 7407 for (j=0;j<n_shared[i];j++) { 7408 if (count[shared[i][j]] < threshold) { 7409 adjncy[xadj_count] = neighs[i]; 7410 adjncy_wgt[xadj_count] = n_shared[i]; 7411 xadj_count++; 7412 break; 7413 } 7414 } 7415 } 7416 xadj[1] = xadj_count; 7417 ierr = PetscFree(count);CHKERRQ(ierr); 7418 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7419 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7420 7421 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7422 7423 /* Restrict work on active processes only */ 7424 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7425 if (void_procs) { 7426 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7427 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7428 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7429 subcomm = PetscSubcommChild(psubcomm); 7430 } else { 7431 psubcomm = NULL; 7432 subcomm = PetscObjectComm((PetscObject)mat); 7433 } 7434 7435 v_wgt = NULL; 7436 if (!color) { 7437 ierr = PetscFree(xadj);CHKERRQ(ierr); 7438 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7439 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7440 } else { 7441 Mat subdomain_adj; 7442 IS new_ranks,new_ranks_contig; 7443 MatPartitioning partitioner; 7444 PetscInt rstart=0,rend=0; 7445 PetscInt *is_indices,*oldranks; 7446 PetscMPIInt size; 7447 PetscBool aggregate; 7448 7449 ierr = MPI_Comm_size(subcomm,&size);CHKERRMPI(ierr); 7450 if (void_procs) { 7451 PetscInt prank = rank; 7452 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7453 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRMPI(ierr); 7454 for (i=0;i<xadj[1];i++) { 7455 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7456 } 7457 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7458 } else { 7459 oldranks = NULL; 7460 } 7461 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7462 if (aggregate) { /* TODO: all this part could be made more efficient */ 7463 PetscInt lrows,row,ncols,*cols; 7464 PetscMPIInt nrank; 7465 PetscScalar *vals; 7466 7467 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRMPI(ierr); 7468 lrows = 0; 7469 if (nrank<redprocs) { 7470 lrows = size/redprocs; 7471 if (nrank<size%redprocs) lrows++; 7472 } 7473 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7474 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7475 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7476 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7477 row = nrank; 7478 ncols = xadj[1]-xadj[0]; 7479 cols = adjncy; 7480 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7481 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7482 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7483 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7484 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7485 ierr = PetscFree(xadj);CHKERRQ(ierr); 7486 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7487 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7488 ierr = PetscFree(vals);CHKERRQ(ierr); 7489 if (use_vwgt) { 7490 Vec v; 7491 const PetscScalar *array; 7492 PetscInt nl; 7493 7494 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7495 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7496 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7497 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7498 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7499 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7500 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7501 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7502 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7503 ierr = VecDestroy(&v);CHKERRQ(ierr); 7504 } 7505 } else { 7506 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7507 if (use_vwgt) { 7508 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7509 v_wgt[0] = n; 7510 } 7511 } 7512 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7513 7514 /* Partition */ 7515 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7516 #if defined(PETSC_HAVE_PTSCOTCH) 7517 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);CHKERRQ(ierr); 7518 #elif defined(PETSC_HAVE_PARMETIS) 7519 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);CHKERRQ(ierr); 7520 #else 7521 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);CHKERRQ(ierr); 7522 #endif 7523 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7524 if (v_wgt) { 7525 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7526 } 7527 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7528 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7529 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7530 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7531 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7532 7533 /* renumber new_ranks to avoid "holes" in new set of processors */ 7534 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7535 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7536 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7537 if (!aggregate) { 7538 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7539 if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7540 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7541 } else if (oldranks) { 7542 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7543 } else { 7544 ranks_send_to_idx[0] = is_indices[0]; 7545 } 7546 } else { 7547 PetscInt idx = 0; 7548 PetscMPIInt tag; 7549 MPI_Request *reqs; 7550 7551 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7552 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7553 for (i=rstart;i<rend;i++) { 7554 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRMPI(ierr); 7555 } 7556 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRMPI(ierr); 7557 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 7558 ierr = PetscFree(reqs);CHKERRQ(ierr); 7559 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7560 if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7561 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7562 } else if (oldranks) { 7563 ranks_send_to_idx[0] = oldranks[idx]; 7564 } else { 7565 ranks_send_to_idx[0] = idx; 7566 } 7567 } 7568 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7569 /* clean up */ 7570 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7571 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7572 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7573 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7574 } 7575 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7576 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7577 7578 /* assemble parallel IS for sends */ 7579 i = 1; 7580 if (!color) i=0; 7581 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7582 PetscFunctionReturn(0); 7583 } 7584 7585 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7586 7587 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7588 { 7589 Mat local_mat; 7590 IS is_sends_internal; 7591 PetscInt rows,cols,new_local_rows; 7592 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7593 PetscBool ismatis,isdense,newisdense,destroy_mat; 7594 ISLocalToGlobalMapping l2gmap; 7595 PetscInt* l2gmap_indices; 7596 const PetscInt* is_indices; 7597 MatType new_local_type; 7598 /* buffers */ 7599 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7600 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7601 PetscInt *recv_buffer_idxs_local; 7602 PetscScalar *ptr_vals,*recv_buffer_vals; 7603 const PetscScalar *send_buffer_vals; 7604 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7605 /* MPI */ 7606 MPI_Comm comm,comm_n; 7607 PetscSubcomm subcomm; 7608 PetscMPIInt n_sends,n_recvs,size; 7609 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7610 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7611 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7612 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7613 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7614 PetscErrorCode ierr; 7615 7616 PetscFunctionBegin; 7617 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7618 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7619 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7620 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7621 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7622 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7623 PetscValidLogicalCollectiveBool(mat,reuse,6); 7624 PetscValidLogicalCollectiveInt(mat,nis,8); 7625 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7626 if (nvecs) { 7627 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7628 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7629 } 7630 /* further checks */ 7631 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7632 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7633 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7634 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7635 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7636 if (reuse && *mat_n) { 7637 PetscInt mrows,mcols,mnrows,mncols; 7638 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7639 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7640 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7641 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7642 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7643 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7644 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7645 } 7646 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7647 PetscValidLogicalCollectiveInt(mat,bs,1); 7648 7649 /* prepare IS for sending if not provided */ 7650 if (!is_sends) { 7651 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7652 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7653 } else { 7654 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7655 is_sends_internal = is_sends; 7656 } 7657 7658 /* get comm */ 7659 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7660 7661 /* compute number of sends */ 7662 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7663 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7664 7665 /* compute number of receives */ 7666 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 7667 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 7668 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 7669 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7670 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7671 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7672 ierr = PetscFree(iflags);CHKERRQ(ierr); 7673 7674 /* restrict comm if requested */ 7675 subcomm = NULL; 7676 destroy_mat = PETSC_FALSE; 7677 if (restrict_comm) { 7678 PetscMPIInt color,subcommsize; 7679 7680 color = 0; 7681 if (restrict_full) { 7682 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7683 } else { 7684 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7685 } 7686 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr); 7687 subcommsize = size - subcommsize; 7688 /* check if reuse has been requested */ 7689 if (reuse) { 7690 if (*mat_n) { 7691 PetscMPIInt subcommsize2; 7692 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRMPI(ierr); 7693 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7694 comm_n = PetscObjectComm((PetscObject)*mat_n); 7695 } else { 7696 comm_n = PETSC_COMM_SELF; 7697 } 7698 } else { /* MAT_INITIAL_MATRIX */ 7699 PetscMPIInt rank; 7700 7701 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 7702 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7703 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7704 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7705 comm_n = PetscSubcommChild(subcomm); 7706 } 7707 /* flag to destroy *mat_n if not significative */ 7708 if (color) destroy_mat = PETSC_TRUE; 7709 } else { 7710 comm_n = comm; 7711 } 7712 7713 /* prepare send/receive buffers */ 7714 ierr = PetscMalloc1(size,&ilengths_idxs);CHKERRQ(ierr); 7715 ierr = PetscArrayzero(ilengths_idxs,size);CHKERRQ(ierr); 7716 ierr = PetscMalloc1(size,&ilengths_vals);CHKERRQ(ierr); 7717 ierr = PetscArrayzero(ilengths_vals,size);CHKERRQ(ierr); 7718 if (nis) { 7719 ierr = PetscCalloc1(size,&ilengths_idxs_is);CHKERRQ(ierr); 7720 } 7721 7722 /* Get data from local matrices */ 7723 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7724 /* TODO: See below some guidelines on how to prepare the local buffers */ 7725 /* 7726 send_buffer_vals should contain the raw values of the local matrix 7727 send_buffer_idxs should contain: 7728 - MatType_PRIVATE type 7729 - PetscInt size_of_l2gmap 7730 - PetscInt global_row_indices[size_of_l2gmap] 7731 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7732 */ 7733 else { 7734 ierr = MatDenseGetArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7735 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7736 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7737 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7738 send_buffer_idxs[1] = i; 7739 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7740 ierr = PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);CHKERRQ(ierr); 7741 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7742 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7743 for (i=0;i<n_sends;i++) { 7744 ilengths_vals[is_indices[i]] = len*len; 7745 ilengths_idxs[is_indices[i]] = len+2; 7746 } 7747 } 7748 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7749 /* additional is (if any) */ 7750 if (nis) { 7751 PetscMPIInt psum; 7752 PetscInt j; 7753 for (j=0,psum=0;j<nis;j++) { 7754 PetscInt plen; 7755 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7756 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7757 psum += len+1; /* indices + lenght */ 7758 } 7759 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7760 for (j=0,psum=0;j<nis;j++) { 7761 PetscInt plen; 7762 const PetscInt *is_array_idxs; 7763 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7764 send_buffer_idxs_is[psum] = plen; 7765 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7766 ierr = PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);CHKERRQ(ierr); 7767 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7768 psum += plen+1; /* indices + lenght */ 7769 } 7770 for (i=0;i<n_sends;i++) { 7771 ilengths_idxs_is[is_indices[i]] = psum; 7772 } 7773 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7774 } 7775 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7776 7777 buf_size_idxs = 0; 7778 buf_size_vals = 0; 7779 buf_size_idxs_is = 0; 7780 buf_size_vecs = 0; 7781 for (i=0;i<n_recvs;i++) { 7782 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7783 buf_size_vals += (PetscInt)olengths_vals[i]; 7784 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7785 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7786 } 7787 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7788 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7789 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7790 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7791 7792 /* get new tags for clean communications */ 7793 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7794 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7795 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7796 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7797 7798 /* allocate for requests */ 7799 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7800 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7801 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7802 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7803 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7804 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7805 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7806 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7807 7808 /* communications */ 7809 ptr_idxs = recv_buffer_idxs; 7810 ptr_vals = recv_buffer_vals; 7811 ptr_idxs_is = recv_buffer_idxs_is; 7812 ptr_vecs = recv_buffer_vecs; 7813 for (i=0;i<n_recvs;i++) { 7814 source_dest = onodes[i]; 7815 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRMPI(ierr); 7816 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRMPI(ierr); 7817 ptr_idxs += olengths_idxs[i]; 7818 ptr_vals += olengths_vals[i]; 7819 if (nis) { 7820 source_dest = onodes_is[i]; 7821 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRMPI(ierr); 7822 ptr_idxs_is += olengths_idxs_is[i]; 7823 } 7824 if (nvecs) { 7825 source_dest = onodes[i]; 7826 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRMPI(ierr); 7827 ptr_vecs += olengths_idxs[i]-2; 7828 } 7829 } 7830 for (i=0;i<n_sends;i++) { 7831 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7832 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRMPI(ierr); 7833 ierr = MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRMPI(ierr); 7834 if (nis) { 7835 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRMPI(ierr); 7836 } 7837 if (nvecs) { 7838 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7839 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRMPI(ierr); 7840 } 7841 } 7842 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7843 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7844 7845 /* assemble new l2g map */ 7846 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 7847 ptr_idxs = recv_buffer_idxs; 7848 new_local_rows = 0; 7849 for (i=0;i<n_recvs;i++) { 7850 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7851 ptr_idxs += olengths_idxs[i]; 7852 } 7853 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7854 ptr_idxs = recv_buffer_idxs; 7855 new_local_rows = 0; 7856 for (i=0;i<n_recvs;i++) { 7857 ierr = PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));CHKERRQ(ierr); 7858 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7859 ptr_idxs += olengths_idxs[i]; 7860 } 7861 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7862 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7863 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7864 7865 /* infer new local matrix type from received local matrices type */ 7866 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7867 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7868 if (n_recvs) { 7869 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7870 ptr_idxs = recv_buffer_idxs; 7871 for (i=0;i<n_recvs;i++) { 7872 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7873 new_local_type_private = MATAIJ_PRIVATE; 7874 break; 7875 } 7876 ptr_idxs += olengths_idxs[i]; 7877 } 7878 switch (new_local_type_private) { 7879 case MATDENSE_PRIVATE: 7880 new_local_type = MATSEQAIJ; 7881 bs = 1; 7882 break; 7883 case MATAIJ_PRIVATE: 7884 new_local_type = MATSEQAIJ; 7885 bs = 1; 7886 break; 7887 case MATBAIJ_PRIVATE: 7888 new_local_type = MATSEQBAIJ; 7889 break; 7890 case MATSBAIJ_PRIVATE: 7891 new_local_type = MATSEQSBAIJ; 7892 break; 7893 default: 7894 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7895 } 7896 } else { /* by default, new_local_type is seqaij */ 7897 new_local_type = MATSEQAIJ; 7898 bs = 1; 7899 } 7900 7901 /* create MATIS object if needed */ 7902 if (!reuse) { 7903 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7904 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7905 } else { 7906 /* it also destroys the local matrices */ 7907 if (*mat_n) { 7908 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7909 } else { /* this is a fake object */ 7910 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7911 } 7912 } 7913 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7914 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7915 7916 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 7917 7918 /* Global to local map of received indices */ 7919 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7920 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7921 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7922 7923 /* restore attributes -> type of incoming data and its size */ 7924 buf_size_idxs = 0; 7925 for (i=0;i<n_recvs;i++) { 7926 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7927 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7928 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7929 } 7930 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7931 7932 /* set preallocation */ 7933 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7934 if (!newisdense) { 7935 PetscInt *new_local_nnz=NULL; 7936 7937 ptr_idxs = recv_buffer_idxs_local; 7938 if (n_recvs) { 7939 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7940 } 7941 for (i=0;i<n_recvs;i++) { 7942 PetscInt j; 7943 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7944 for (j=0;j<*(ptr_idxs+1);j++) { 7945 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7946 } 7947 } else { 7948 /* TODO */ 7949 } 7950 ptr_idxs += olengths_idxs[i]; 7951 } 7952 if (new_local_nnz) { 7953 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7954 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7955 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7956 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7957 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7958 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7959 } else { 7960 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7961 } 7962 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7963 } else { 7964 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7965 } 7966 7967 /* set values */ 7968 ptr_vals = recv_buffer_vals; 7969 ptr_idxs = recv_buffer_idxs_local; 7970 for (i=0;i<n_recvs;i++) { 7971 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7972 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7973 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7974 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7975 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7976 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7977 } else { 7978 /* TODO */ 7979 } 7980 ptr_idxs += olengths_idxs[i]; 7981 ptr_vals += olengths_vals[i]; 7982 } 7983 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7984 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7985 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7986 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7987 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7988 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7989 7990 #if 0 7991 if (!restrict_comm) { /* check */ 7992 Vec lvec,rvec; 7993 PetscReal infty_error; 7994 7995 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7996 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7997 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7998 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7999 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 8000 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8001 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);CHKERRQ(ierr); 8002 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 8003 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 8004 } 8005 #endif 8006 8007 /* assemble new additional is (if any) */ 8008 if (nis) { 8009 PetscInt **temp_idxs,*count_is,j,psum; 8010 8011 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8012 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 8013 ptr_idxs = recv_buffer_idxs_is; 8014 psum = 0; 8015 for (i=0;i<n_recvs;i++) { 8016 for (j=0;j<nis;j++) { 8017 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8018 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8019 psum += plen; 8020 ptr_idxs += plen+1; /* shift pointer to received data */ 8021 } 8022 } 8023 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 8024 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 8025 for (i=1;i<nis;i++) { 8026 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 8027 } 8028 ierr = PetscArrayzero(count_is,nis);CHKERRQ(ierr); 8029 ptr_idxs = recv_buffer_idxs_is; 8030 for (i=0;i<n_recvs;i++) { 8031 for (j=0;j<nis;j++) { 8032 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8033 ierr = PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);CHKERRQ(ierr); 8034 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8035 ptr_idxs += plen+1; /* shift pointer to received data */ 8036 } 8037 } 8038 for (i=0;i<nis;i++) { 8039 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8040 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr); 8041 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8042 } 8043 ierr = PetscFree(count_is);CHKERRQ(ierr); 8044 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 8045 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 8046 } 8047 /* free workspace */ 8048 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 8049 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8050 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 8051 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8052 if (isdense) { 8053 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 8054 ierr = MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 8055 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 8056 } else { 8057 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 8058 } 8059 if (nis) { 8060 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8061 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 8062 } 8063 8064 if (nvecs) { 8065 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8066 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8067 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8068 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8069 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 8070 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 8071 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 8072 /* set values */ 8073 ptr_vals = recv_buffer_vecs; 8074 ptr_idxs = recv_buffer_idxs_local; 8075 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8076 for (i=0;i<n_recvs;i++) { 8077 PetscInt j; 8078 for (j=0;j<*(ptr_idxs+1);j++) { 8079 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8080 } 8081 ptr_idxs += olengths_idxs[i]; 8082 ptr_vals += olengths_idxs[i]-2; 8083 } 8084 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8085 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 8086 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 8087 } 8088 8089 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 8090 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 8091 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 8092 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 8093 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 8094 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 8095 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 8096 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 8097 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 8098 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 8099 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 8100 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 8101 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 8102 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 8103 ierr = PetscFree(onodes);CHKERRQ(ierr); 8104 if (nis) { 8105 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 8106 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 8107 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 8108 } 8109 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 8110 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 8111 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 8112 for (i=0;i<nis;i++) { 8113 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8114 } 8115 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8116 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8117 } 8118 *mat_n = NULL; 8119 } 8120 PetscFunctionReturn(0); 8121 } 8122 8123 /* temporary hack into ksp private data structure */ 8124 #include <petsc/private/kspimpl.h> 8125 8126 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8127 { 8128 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8129 PC_IS *pcis = (PC_IS*)pc->data; 8130 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8131 Mat coarsedivudotp = NULL; 8132 Mat coarseG,t_coarse_mat_is; 8133 MatNullSpace CoarseNullSpace = NULL; 8134 ISLocalToGlobalMapping coarse_islg; 8135 IS coarse_is,*isarray,corners; 8136 PetscInt i,im_active=-1,active_procs=-1; 8137 PetscInt nis,nisdofs,nisneu,nisvert; 8138 PetscInt coarse_eqs_per_proc; 8139 PC pc_temp; 8140 PCType coarse_pc_type; 8141 KSPType coarse_ksp_type; 8142 PetscBool multilevel_requested,multilevel_allowed; 8143 PetscBool coarse_reuse; 8144 PetscInt ncoarse,nedcfield; 8145 PetscBool compute_vecs = PETSC_FALSE; 8146 PetscScalar *array; 8147 MatReuse coarse_mat_reuse; 8148 PetscBool restr, full_restr, have_void; 8149 PetscMPIInt size; 8150 PetscErrorCode ierr; 8151 8152 PetscFunctionBegin; 8153 ierr = PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8154 /* Assign global numbering to coarse dofs */ 8155 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8156 PetscInt ocoarse_size; 8157 compute_vecs = PETSC_TRUE; 8158 8159 pcbddc->new_primal_space = PETSC_TRUE; 8160 ocoarse_size = pcbddc->coarse_size; 8161 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 8162 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 8163 /* see if we can avoid some work */ 8164 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8165 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8166 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8167 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 8168 coarse_reuse = PETSC_FALSE; 8169 } else { /* we can safely reuse already computed coarse matrix */ 8170 coarse_reuse = PETSC_TRUE; 8171 } 8172 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8173 coarse_reuse = PETSC_FALSE; 8174 } 8175 /* reset any subassembling information */ 8176 if (!coarse_reuse || pcbddc->recompute_topography) { 8177 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8178 } 8179 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8180 coarse_reuse = PETSC_TRUE; 8181 } 8182 if (coarse_reuse && pcbddc->coarse_ksp) { 8183 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 8184 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 8185 coarse_mat_reuse = MAT_REUSE_MATRIX; 8186 } else { 8187 coarse_mat = NULL; 8188 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8189 } 8190 8191 /* creates temporary l2gmap and IS for coarse indexes */ 8192 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 8193 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 8194 8195 /* creates temporary MATIS object for coarse matrix */ 8196 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 8197 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 8198 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 8199 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8200 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8201 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 8202 8203 /* count "active" (i.e. with positive local size) and "void" processes */ 8204 im_active = !!(pcis->n); 8205 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 8206 8207 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8208 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8209 /* full_restr : just use the receivers from the subassembling pattern */ 8210 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 8211 coarse_mat_is = NULL; 8212 multilevel_allowed = PETSC_FALSE; 8213 multilevel_requested = PETSC_FALSE; 8214 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8215 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8216 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8217 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8218 if (multilevel_requested) { 8219 ncoarse = active_procs/pcbddc->coarsening_ratio; 8220 restr = PETSC_FALSE; 8221 full_restr = PETSC_FALSE; 8222 } else { 8223 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8224 restr = PETSC_TRUE; 8225 full_restr = PETSC_TRUE; 8226 } 8227 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8228 ncoarse = PetscMax(1,ncoarse); 8229 if (!pcbddc->coarse_subassembling) { 8230 if (pcbddc->coarsening_ratio > 1) { 8231 if (multilevel_requested) { 8232 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8233 } else { 8234 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8235 } 8236 } else { 8237 PetscMPIInt rank; 8238 8239 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 8240 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8241 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8242 } 8243 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8244 PetscInt psum; 8245 if (pcbddc->coarse_ksp) psum = 1; 8246 else psum = 0; 8247 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 8248 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8249 } 8250 /* determine if we can go multilevel */ 8251 if (multilevel_requested) { 8252 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8253 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8254 } 8255 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8256 8257 /* dump subassembling pattern */ 8258 if (pcbddc->dbg_flag && multilevel_allowed) { 8259 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 8260 } 8261 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8262 nedcfield = -1; 8263 corners = NULL; 8264 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8265 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8266 const PetscInt *idxs; 8267 ISLocalToGlobalMapping tmap; 8268 8269 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8270 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 8271 /* allocate space for temporary storage */ 8272 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 8273 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 8274 /* allocate for IS array */ 8275 nisdofs = pcbddc->n_ISForDofsLocal; 8276 if (pcbddc->nedclocal) { 8277 if (pcbddc->nedfield > -1) { 8278 nedcfield = pcbddc->nedfield; 8279 } else { 8280 nedcfield = 0; 8281 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8282 nisdofs = 1; 8283 } 8284 } 8285 nisneu = !!pcbddc->NeumannBoundariesLocal; 8286 nisvert = 0; /* nisvert is not used */ 8287 nis = nisdofs + nisneu + nisvert; 8288 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 8289 /* dofs splitting */ 8290 for (i=0;i<nisdofs;i++) { 8291 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 8292 if (nedcfield != i) { 8293 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 8294 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8295 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8296 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8297 } else { 8298 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 8299 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8300 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8301 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8302 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8303 } 8304 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8305 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8306 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 8307 } 8308 /* neumann boundaries */ 8309 if (pcbddc->NeumannBoundariesLocal) { 8310 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 8311 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 8312 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8313 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8314 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8315 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8316 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8317 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8318 } 8319 /* coordinates */ 8320 if (pcbddc->corner_selected) { 8321 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8322 ierr = ISGetLocalSize(corners,&tsize);CHKERRQ(ierr); 8323 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8324 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8325 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout); 8326 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8327 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8328 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8329 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);CHKERRQ(ierr); 8330 } 8331 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8332 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8333 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8334 } else { 8335 nis = 0; 8336 nisdofs = 0; 8337 nisneu = 0; 8338 nisvert = 0; 8339 isarray = NULL; 8340 } 8341 /* destroy no longer needed map */ 8342 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8343 8344 /* subassemble */ 8345 if (multilevel_allowed) { 8346 Vec vp[1]; 8347 PetscInt nvecs = 0; 8348 PetscBool reuse,reuser; 8349 8350 if (coarse_mat) reuse = PETSC_TRUE; 8351 else reuse = PETSC_FALSE; 8352 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 8353 vp[0] = NULL; 8354 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8355 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8356 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8357 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8358 nvecs = 1; 8359 8360 if (pcbddc->divudotp) { 8361 Mat B,loc_divudotp; 8362 Vec v,p; 8363 IS dummy; 8364 PetscInt np; 8365 8366 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8367 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8368 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8369 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8370 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8371 ierr = VecSet(p,1.);CHKERRQ(ierr); 8372 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8373 ierr = VecDestroy(&p);CHKERRQ(ierr); 8374 ierr = MatDestroy(&B);CHKERRQ(ierr); 8375 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8376 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8377 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8378 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8379 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8380 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8381 ierr = VecDestroy(&v);CHKERRQ(ierr); 8382 } 8383 } 8384 if (reuser) { 8385 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8386 } else { 8387 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8388 } 8389 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8390 PetscScalar *arraym; 8391 const PetscScalar *arrayv; 8392 PetscInt nl; 8393 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8394 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8395 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8396 ierr = VecGetArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8397 ierr = PetscArraycpy(arraym,arrayv,nl);CHKERRQ(ierr); 8398 ierr = VecRestoreArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8399 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8400 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8401 } else { 8402 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8403 } 8404 } else { 8405 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8406 } 8407 if (coarse_mat_is || coarse_mat) { 8408 if (!multilevel_allowed) { 8409 ierr = MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8410 } else { 8411 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8412 if (coarse_mat_is) { 8413 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8414 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8415 coarse_mat = coarse_mat_is; 8416 } 8417 } 8418 } 8419 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8420 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8421 8422 /* create local to global scatters for coarse problem */ 8423 if (compute_vecs) { 8424 PetscInt lrows; 8425 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8426 if (coarse_mat) { 8427 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8428 } else { 8429 lrows = 0; 8430 } 8431 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8432 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8433 ierr = VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);CHKERRQ(ierr); 8434 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8435 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8436 } 8437 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8438 8439 /* set defaults for coarse KSP and PC */ 8440 if (multilevel_allowed) { 8441 coarse_ksp_type = KSPRICHARDSON; 8442 coarse_pc_type = PCBDDC; 8443 } else { 8444 coarse_ksp_type = KSPPREONLY; 8445 coarse_pc_type = PCREDUNDANT; 8446 } 8447 8448 /* print some info if requested */ 8449 if (pcbddc->dbg_flag) { 8450 if (!multilevel_allowed) { 8451 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8452 if (multilevel_requested) { 8453 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8454 } else if (pcbddc->max_levels) { 8455 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);CHKERRQ(ierr); 8456 } 8457 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8458 } 8459 } 8460 8461 /* communicate coarse discrete gradient */ 8462 coarseG = NULL; 8463 if (pcbddc->nedcG && multilevel_allowed) { 8464 MPI_Comm ccomm; 8465 if (coarse_mat) { 8466 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8467 } else { 8468 ccomm = MPI_COMM_NULL; 8469 } 8470 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8471 } 8472 8473 /* create the coarse KSP object only once with defaults */ 8474 if (coarse_mat) { 8475 PetscBool isredundant,isbddc,force,valid; 8476 PetscViewer dbg_viewer = NULL; 8477 8478 if (pcbddc->dbg_flag) { 8479 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8480 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8481 } 8482 if (!pcbddc->coarse_ksp) { 8483 char prefix[256],str_level[16]; 8484 size_t len; 8485 8486 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8487 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8488 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8489 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8490 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8491 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8492 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8493 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8494 /* TODO is this logic correct? should check for coarse_mat type */ 8495 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8496 /* prefix */ 8497 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8498 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8499 if (!pcbddc->current_level) { 8500 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8501 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8502 } else { 8503 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8504 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8505 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8506 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8507 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8508 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8509 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8510 } 8511 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8512 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8513 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8514 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8515 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8516 /* allow user customization */ 8517 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8518 /* get some info after set from options */ 8519 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8520 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8521 force = PETSC_FALSE; 8522 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8523 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8524 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8525 if (multilevel_allowed && !force && !valid) { 8526 isbddc = PETSC_TRUE; 8527 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8528 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8529 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8530 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8531 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8532 ierr = PetscObjectOptionsBegin((PetscObject)pc_temp);CHKERRQ(ierr); 8533 ierr = (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);CHKERRQ(ierr); 8534 ierr = PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);CHKERRQ(ierr); 8535 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8536 pc_temp->setfromoptionscalled++; 8537 } 8538 } 8539 } 8540 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8541 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8542 if (nisdofs) { 8543 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8544 for (i=0;i<nisdofs;i++) { 8545 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8546 } 8547 } 8548 if (nisneu) { 8549 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8550 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8551 } 8552 if (nisvert) { 8553 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8554 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8555 } 8556 if (coarseG) { 8557 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8558 } 8559 8560 /* get some info after set from options */ 8561 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8562 8563 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8564 if (isbddc && !multilevel_allowed) { 8565 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8566 } 8567 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8568 force = PETSC_FALSE; 8569 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8570 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8571 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8572 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8573 } 8574 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8575 if (isredundant) { 8576 KSP inner_ksp; 8577 PC inner_pc; 8578 8579 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8580 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8581 } 8582 8583 /* parameters which miss an API */ 8584 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8585 if (isbddc) { 8586 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8587 8588 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8589 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8590 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8591 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8592 if (pcbddc_coarse->benign_saddle_point) { 8593 Mat coarsedivudotp_is; 8594 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8595 IS row,col; 8596 const PetscInt *gidxs; 8597 PetscInt n,st,M,N; 8598 8599 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8600 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRMPI(ierr); 8601 st = st-n; 8602 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8603 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8604 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8605 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8606 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8607 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8608 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8609 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8610 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8611 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8612 ierr = ISDestroy(&row);CHKERRQ(ierr); 8613 ierr = ISDestroy(&col);CHKERRQ(ierr); 8614 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8615 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8616 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8617 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8618 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8619 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8620 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8621 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8622 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8623 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8624 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8625 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8626 } 8627 } 8628 8629 /* propagate symmetry info of coarse matrix */ 8630 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8631 if (pc->pmat->symmetric_set) { 8632 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8633 } 8634 if (pc->pmat->hermitian_set) { 8635 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8636 } 8637 if (pc->pmat->spd_set) { 8638 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8639 } 8640 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8641 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8642 } 8643 /* set operators */ 8644 ierr = MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");CHKERRQ(ierr); 8645 ierr = MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 8646 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8647 if (pcbddc->dbg_flag) { 8648 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8649 } 8650 } 8651 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8652 ierr = PetscFree(isarray);CHKERRQ(ierr); 8653 #if 0 8654 { 8655 PetscViewer viewer; 8656 char filename[256]; 8657 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8658 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8659 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8660 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8661 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8662 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8663 } 8664 #endif 8665 8666 if (corners) { 8667 Vec gv; 8668 IS is; 8669 const PetscInt *idxs; 8670 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8671 PetscScalar *coords; 8672 8673 if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8674 ierr = VecGetSize(pcbddc->coarse_vec,&N);CHKERRQ(ierr); 8675 ierr = VecGetLocalSize(pcbddc->coarse_vec,&n);CHKERRQ(ierr); 8676 ierr = VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);CHKERRQ(ierr); 8677 ierr = VecSetBlockSize(gv,cdim);CHKERRQ(ierr); 8678 ierr = VecSetSizes(gv,n*cdim,N*cdim);CHKERRQ(ierr); 8679 ierr = VecSetType(gv,VECSTANDARD);CHKERRQ(ierr); 8680 ierr = VecSetFromOptions(gv);CHKERRQ(ierr); 8681 ierr = VecSet(gv,PETSC_MAX_REAL);CHKERRQ(ierr); /* we only propagate coordinates from vertices constraints */ 8682 8683 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8684 ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr); 8685 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 8686 ierr = PetscMalloc1(n*cdim,&coords);CHKERRQ(ierr); 8687 for (i=0;i<n;i++) { 8688 for (d=0;d<cdim;d++) { 8689 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8690 } 8691 } 8692 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 8693 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8694 8695 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 8696 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8697 ierr = VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);CHKERRQ(ierr); 8698 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8699 ierr = PetscFree(coords);CHKERRQ(ierr); 8700 ierr = VecAssemblyBegin(gv);CHKERRQ(ierr); 8701 ierr = VecAssemblyEnd(gv);CHKERRQ(ierr); 8702 ierr = VecGetArray(gv,&coords);CHKERRQ(ierr); 8703 if (pcbddc->coarse_ksp) { 8704 PC coarse_pc; 8705 PetscBool isbddc; 8706 8707 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 8708 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 8709 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8710 PetscReal *realcoords; 8711 8712 ierr = VecGetLocalSize(gv,&n);CHKERRQ(ierr); 8713 #if defined(PETSC_USE_COMPLEX) 8714 ierr = PetscMalloc1(n,&realcoords);CHKERRQ(ierr); 8715 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8716 #else 8717 realcoords = coords; 8718 #endif 8719 ierr = PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);CHKERRQ(ierr); 8720 #if defined(PETSC_USE_COMPLEX) 8721 ierr = PetscFree(realcoords);CHKERRQ(ierr); 8722 #endif 8723 } 8724 } 8725 ierr = VecRestoreArray(gv,&coords);CHKERRQ(ierr); 8726 ierr = VecDestroy(&gv);CHKERRQ(ierr); 8727 } 8728 ierr = ISDestroy(&corners);CHKERRQ(ierr); 8729 8730 if (pcbddc->coarse_ksp) { 8731 Vec crhs,csol; 8732 8733 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8734 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8735 if (!csol) { 8736 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8737 } 8738 if (!crhs) { 8739 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8740 } 8741 } 8742 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8743 8744 /* compute null space for coarse solver if the benign trick has been requested */ 8745 if (pcbddc->benign_null) { 8746 8747 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8748 for (i=0;i<pcbddc->benign_n;i++) { 8749 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8750 } 8751 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8752 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8753 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8754 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8755 if (coarse_mat) { 8756 Vec nullv; 8757 PetscScalar *array,*array2; 8758 PetscInt nl; 8759 8760 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8761 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8762 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8763 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8764 ierr = PetscArraycpy(array2,array,nl);CHKERRQ(ierr); 8765 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8766 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8767 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8768 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8769 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8770 } 8771 } 8772 ierr = PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8773 8774 ierr = PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8775 if (pcbddc->coarse_ksp) { 8776 PetscBool ispreonly; 8777 8778 if (CoarseNullSpace) { 8779 PetscBool isnull; 8780 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8781 if (isnull) { 8782 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8783 } 8784 /* TODO: add local nullspaces (if any) */ 8785 } 8786 /* setup coarse ksp */ 8787 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8788 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8789 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8790 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8791 KSP check_ksp; 8792 KSPType check_ksp_type; 8793 PC check_pc; 8794 Vec check_vec,coarse_vec; 8795 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8796 PetscInt its; 8797 PetscBool compute_eigs; 8798 PetscReal *eigs_r,*eigs_c; 8799 PetscInt neigs; 8800 const char *prefix; 8801 8802 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8803 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8804 ierr = PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);CHKERRQ(ierr); 8805 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 8806 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8807 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8808 /* prevent from setup unneeded object */ 8809 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8810 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8811 if (ispreonly) { 8812 check_ksp_type = KSPPREONLY; 8813 compute_eigs = PETSC_FALSE; 8814 } else { 8815 check_ksp_type = KSPGMRES; 8816 compute_eigs = PETSC_TRUE; 8817 } 8818 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8819 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8820 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8821 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8822 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8823 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8824 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8825 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8826 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8827 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8828 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8829 /* create random vec */ 8830 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8831 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8832 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8833 /* solve coarse problem */ 8834 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8835 ierr = KSPCheckSolve(check_ksp,pc,coarse_vec);CHKERRQ(ierr); 8836 /* set eigenvalue estimation if preonly has not been requested */ 8837 if (compute_eigs) { 8838 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8839 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8840 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8841 if (neigs) { 8842 lambda_max = eigs_r[neigs-1]; 8843 lambda_min = eigs_r[0]; 8844 if (pcbddc->use_coarse_estimates) { 8845 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8846 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8847 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8848 } 8849 } 8850 } 8851 } 8852 8853 /* check coarse problem residual error */ 8854 if (pcbddc->dbg_flag) { 8855 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8856 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8857 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8858 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8859 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8860 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8861 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8862 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8863 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8864 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8865 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8866 if (CoarseNullSpace) { 8867 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8868 } 8869 if (compute_eigs) { 8870 PetscReal lambda_max_s,lambda_min_s; 8871 KSPConvergedReason reason; 8872 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8873 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8874 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8875 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8876 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8877 for (i=0;i<neigs;i++) { 8878 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8879 } 8880 } 8881 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8882 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8883 } 8884 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8885 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8886 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8887 if (compute_eigs) { 8888 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8889 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8890 } 8891 } 8892 } 8893 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8894 /* print additional info */ 8895 if (pcbddc->dbg_flag) { 8896 /* waits until all processes reaches this point */ 8897 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8898 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);CHKERRQ(ierr); 8899 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8900 } 8901 8902 /* free memory */ 8903 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8904 ierr = PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8905 PetscFunctionReturn(0); 8906 } 8907 8908 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8909 { 8910 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8911 PC_IS* pcis = (PC_IS*)pc->data; 8912 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8913 IS subset,subset_mult,subset_n; 8914 PetscInt local_size,coarse_size=0; 8915 PetscInt *local_primal_indices=NULL; 8916 const PetscInt *t_local_primal_indices; 8917 PetscErrorCode ierr; 8918 8919 PetscFunctionBegin; 8920 /* Compute global number of coarse dofs */ 8921 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8922 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8923 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8924 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8925 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8926 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8927 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8928 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8929 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8930 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8931 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8932 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8933 ierr = PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);CHKERRQ(ierr); 8934 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8935 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8936 8937 /* check numbering */ 8938 if (pcbddc->dbg_flag) { 8939 PetscScalar coarsesum,*array,*array2; 8940 PetscInt i; 8941 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8942 8943 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8944 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8945 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8946 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8947 /* counter */ 8948 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8949 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8950 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8951 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8952 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8953 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8954 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8955 for (i=0;i<pcbddc->local_primal_size;i++) { 8956 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8957 } 8958 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8959 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8960 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8961 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8962 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8963 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8964 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8965 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8966 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8967 for (i=0;i<pcis->n;i++) { 8968 if (array[i] != 0.0 && array[i] != array2[i]) { 8969 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8970 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8971 set_error = PETSC_TRUE; 8972 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8973 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8974 } 8975 } 8976 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8977 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 8978 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8979 for (i=0;i<pcis->n;i++) { 8980 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8981 } 8982 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8983 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8984 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8985 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8986 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8987 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8988 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8989 PetscInt *gidxs; 8990 8991 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8992 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8993 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8994 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8996 for (i=0;i<pcbddc->local_primal_size;i++) { 8997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8998 } 8999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9000 ierr = PetscFree(gidxs);CHKERRQ(ierr); 9001 } 9002 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9003 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9004 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 9005 } 9006 9007 /* get back data */ 9008 *coarse_size_n = coarse_size; 9009 *local_primal_indices_n = local_primal_indices; 9010 PetscFunctionReturn(0); 9011 } 9012 9013 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 9014 { 9015 IS localis_t; 9016 PetscInt i,lsize,*idxs,n; 9017 PetscScalar *vals; 9018 PetscErrorCode ierr; 9019 9020 PetscFunctionBegin; 9021 /* get indices in local ordering exploiting local to global map */ 9022 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 9023 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 9024 for (i=0;i<lsize;i++) vals[i] = 1.0; 9025 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9026 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 9027 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 9028 if (idxs) { /* multilevel guard */ 9029 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 9030 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 9031 } 9032 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 9033 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9034 ierr = PetscFree(vals);CHKERRQ(ierr); 9035 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 9036 /* now compute set in local ordering */ 9037 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9038 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9039 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9040 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 9041 for (i=0,lsize=0;i<n;i++) { 9042 if (PetscRealPart(vals[i]) > 0.5) { 9043 lsize++; 9044 } 9045 } 9046 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 9047 for (i=0,lsize=0;i<n;i++) { 9048 if (PetscRealPart(vals[i]) > 0.5) { 9049 idxs[lsize++] = i; 9050 } 9051 } 9052 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9053 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 9054 *localis = localis_t; 9055 PetscFunctionReturn(0); 9056 } 9057 9058 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9059 { 9060 PC_IS *pcis=(PC_IS*)pc->data; 9061 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9062 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9063 Mat S_j; 9064 PetscInt *used_xadj,*used_adjncy; 9065 PetscBool free_used_adj; 9066 PetscErrorCode ierr; 9067 9068 PetscFunctionBegin; 9069 ierr = PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9070 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9071 free_used_adj = PETSC_FALSE; 9072 if (pcbddc->sub_schurs_layers == -1) { 9073 used_xadj = NULL; 9074 used_adjncy = NULL; 9075 } else { 9076 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9077 used_xadj = pcbddc->mat_graph->xadj; 9078 used_adjncy = pcbddc->mat_graph->adjncy; 9079 } else if (pcbddc->computed_rowadj) { 9080 used_xadj = pcbddc->mat_graph->xadj; 9081 used_adjncy = pcbddc->mat_graph->adjncy; 9082 } else { 9083 PetscBool flg_row=PETSC_FALSE; 9084 const PetscInt *xadj,*adjncy; 9085 PetscInt nvtxs; 9086 9087 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9088 if (flg_row) { 9089 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 9090 ierr = PetscArraycpy(used_xadj,xadj,nvtxs+1);CHKERRQ(ierr); 9091 ierr = PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);CHKERRQ(ierr); 9092 free_used_adj = PETSC_TRUE; 9093 } else { 9094 pcbddc->sub_schurs_layers = -1; 9095 used_xadj = NULL; 9096 used_adjncy = NULL; 9097 } 9098 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9099 } 9100 } 9101 9102 /* setup sub_schurs data */ 9103 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9104 if (!sub_schurs->schur_explicit) { 9105 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9106 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9107 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 9108 } else { 9109 Mat change = NULL; 9110 Vec scaling = NULL; 9111 IS change_primal = NULL, iP; 9112 PetscInt benign_n; 9113 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9114 PetscBool need_change = PETSC_FALSE; 9115 PetscBool discrete_harmonic = PETSC_FALSE; 9116 9117 if (!pcbddc->use_vertices && reuse_solvers) { 9118 PetscInt n_vertices; 9119 9120 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 9121 reuse_solvers = (PetscBool)!n_vertices; 9122 } 9123 if (!pcbddc->benign_change_explicit) { 9124 benign_n = pcbddc->benign_n; 9125 } else { 9126 benign_n = 0; 9127 } 9128 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9129 We need a global reduction to avoid possible deadlocks. 9130 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9131 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9132 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9133 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 9134 need_change = (PetscBool)(!need_change); 9135 } 9136 /* If the user defines additional constraints, we import them here. 9137 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 9138 if (need_change) { 9139 PC_IS *pcisf; 9140 PC_BDDC *pcbddcf; 9141 PC pcf; 9142 9143 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9144 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 9145 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 9146 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 9147 9148 /* hacks */ 9149 pcisf = (PC_IS*)pcf->data; 9150 pcisf->is_B_local = pcis->is_B_local; 9151 pcisf->vec1_N = pcis->vec1_N; 9152 pcisf->BtoNmap = pcis->BtoNmap; 9153 pcisf->n = pcis->n; 9154 pcisf->n_B = pcis->n_B; 9155 pcbddcf = (PC_BDDC*)pcf->data; 9156 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 9157 pcbddcf->mat_graph = pcbddc->mat_graph; 9158 pcbddcf->use_faces = PETSC_TRUE; 9159 pcbddcf->use_change_of_basis = PETSC_TRUE; 9160 pcbddcf->use_change_on_faces = PETSC_TRUE; 9161 pcbddcf->use_qr_single = PETSC_TRUE; 9162 pcbddcf->fake_change = PETSC_TRUE; 9163 9164 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 9165 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 9166 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 9167 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 9168 change = pcbddcf->ConstraintMatrix; 9169 pcbddcf->ConstraintMatrix = NULL; 9170 9171 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 9172 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 9173 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 9174 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 9175 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 9176 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 9177 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 9178 pcf->ops->destroy = NULL; 9179 pcf->ops->reset = NULL; 9180 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 9181 } 9182 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9183 9184 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 9185 if (iP) { 9186 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 9187 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 9188 ierr = PetscOptionsEnd();CHKERRQ(ierr); 9189 } 9190 if (discrete_harmonic) { 9191 Mat A; 9192 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 9193 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 9194 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 9195 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9196 ierr = MatDestroy(&A);CHKERRQ(ierr); 9197 } else { 9198 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9199 } 9200 ierr = MatDestroy(&change);CHKERRQ(ierr); 9201 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 9202 } 9203 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9204 9205 /* free adjacency */ 9206 if (free_used_adj) { 9207 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 9208 } 9209 ierr = PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9210 PetscFunctionReturn(0); 9211 } 9212 9213 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9214 { 9215 PC_IS *pcis=(PC_IS*)pc->data; 9216 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9217 PCBDDCGraph graph; 9218 PetscErrorCode ierr; 9219 9220 PetscFunctionBegin; 9221 /* attach interface graph for determining subsets */ 9222 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9223 IS verticesIS,verticescomm; 9224 PetscInt vsize,*idxs; 9225 9226 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9227 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 9228 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9229 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 9230 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9231 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9232 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 9233 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 9234 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 9235 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 9236 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 9237 } else { 9238 graph = pcbddc->mat_graph; 9239 } 9240 /* print some info */ 9241 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9242 IS vertices; 9243 PetscInt nv,nedges,nfaces; 9244 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 9245 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9246 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 9247 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9248 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 9249 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);CHKERRQ(ierr); 9250 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);CHKERRQ(ierr); 9251 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);CHKERRQ(ierr); 9252 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9253 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9254 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9255 } 9256 9257 /* sub_schurs init */ 9258 if (!pcbddc->sub_schurs) { 9259 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 9260 } 9261 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 9262 9263 /* free graph struct */ 9264 if (pcbddc->sub_schurs_rebuild) { 9265 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 9266 } 9267 PetscFunctionReturn(0); 9268 } 9269 9270 PetscErrorCode PCBDDCCheckOperator(PC pc) 9271 { 9272 PC_IS *pcis=(PC_IS*)pc->data; 9273 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9274 PetscErrorCode ierr; 9275 9276 PetscFunctionBegin; 9277 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9278 IS zerodiag = NULL; 9279 Mat S_j,B0_B=NULL; 9280 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9281 PetscScalar *p0_check,*array,*array2; 9282 PetscReal norm; 9283 PetscInt i; 9284 9285 /* B0 and B0_B */ 9286 if (zerodiag) { 9287 IS dummy; 9288 9289 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 9290 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 9291 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 9292 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 9293 } 9294 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9295 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 9296 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 9297 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9298 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9299 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9300 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9301 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 9302 /* S_j */ 9303 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9304 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9305 9306 /* mimic vector in \widetilde{W}_\Gamma */ 9307 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 9308 /* continuous in primal space */ 9309 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 9310 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9311 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9312 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9313 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 9314 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9315 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9316 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9317 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9318 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9319 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9320 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9321 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 9322 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 9323 9324 /* assemble rhs for coarse problem */ 9325 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9326 /* local with Schur */ 9327 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 9328 if (zerodiag) { 9329 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9330 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9331 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9332 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 9333 } 9334 /* sum on primal nodes the local contributions */ 9335 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9336 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9337 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9338 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9339 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9340 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9341 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9342 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 9343 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9344 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9345 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9346 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9347 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9348 /* scale primal nodes (BDDC sums contibutions) */ 9349 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 9350 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9351 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9352 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9353 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9354 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9355 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9356 /* global: \widetilde{B0}_B w_\Gamma */ 9357 if (zerodiag) { 9358 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 9359 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9360 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9361 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9362 } 9363 /* BDDC */ 9364 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 9365 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 9366 9367 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 9368 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 9369 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 9370 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);CHKERRQ(ierr); 9371 for (i=0;i<pcbddc->benign_n;i++) { 9372 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));CHKERRQ(ierr); 9373 } 9374 ierr = PetscFree(p0_check);CHKERRQ(ierr); 9375 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 9376 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 9377 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 9378 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9379 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 9380 } 9381 PetscFunctionReturn(0); 9382 } 9383 9384 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9385 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9386 { 9387 Mat At; 9388 IS rows; 9389 PetscInt rst,ren; 9390 PetscErrorCode ierr; 9391 PetscLayout rmap; 9392 9393 PetscFunctionBegin; 9394 rst = ren = 0; 9395 if (ccomm != MPI_COMM_NULL) { 9396 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 9397 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 9398 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 9399 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 9400 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 9401 } 9402 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 9403 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 9404 ierr = ISDestroy(&rows);CHKERRQ(ierr); 9405 9406 if (ccomm != MPI_COMM_NULL) { 9407 Mat_MPIAIJ *a,*b; 9408 IS from,to; 9409 Vec gvec; 9410 PetscInt lsize; 9411 9412 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9413 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9414 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9415 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9416 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9417 a = (Mat_MPIAIJ*)At->data; 9418 b = (Mat_MPIAIJ*)(*B)->data; 9419 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRMPI(ierr); 9420 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRMPI(ierr); 9421 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9422 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9423 b->A = a->A; 9424 b->B = a->B; 9425 9426 b->donotstash = a->donotstash; 9427 b->roworiented = a->roworiented; 9428 b->rowindices = NULL; 9429 b->rowvalues = NULL; 9430 b->getrowactive = PETSC_FALSE; 9431 9432 (*B)->rmap = rmap; 9433 (*B)->factortype = A->factortype; 9434 (*B)->assembled = PETSC_TRUE; 9435 (*B)->insertmode = NOT_SET_VALUES; 9436 (*B)->preallocated = PETSC_TRUE; 9437 9438 if (a->colmap) { 9439 #if defined(PETSC_USE_CTABLE) 9440 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9441 #else 9442 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9443 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9444 ierr = PetscArraycpy(b->colmap,a->colmap,At->cmap->N);CHKERRQ(ierr); 9445 #endif 9446 } else b->colmap = NULL; 9447 if (a->garray) { 9448 PetscInt len; 9449 len = a->B->cmap->n; 9450 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9451 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9452 if (len) { ierr = PetscArraycpy(b->garray,a->garray,len);CHKERRQ(ierr); } 9453 } else b->garray = NULL; 9454 9455 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9456 b->lvec = a->lvec; 9457 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9458 9459 /* cannot use VecScatterCopy */ 9460 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9461 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9462 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9463 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9464 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9465 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9466 ierr = ISDestroy(&from);CHKERRQ(ierr); 9467 ierr = ISDestroy(&to);CHKERRQ(ierr); 9468 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9469 } 9470 ierr = MatDestroy(&At);CHKERRQ(ierr); 9471 PetscFunctionReturn(0); 9472 } 9473