1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 PetscErrorCode ierr; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 28 if (!nr || !nc) PetscFunctionReturn(0); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 33 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr,nc); 39 if (!rwork) { 40 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 50 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 51 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 #else 55 ierr = PetscMalloc1(5*n,&rwork2);CHKERRQ(ierr); 56 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 57 ierr = PetscFree(rwork2);CHKERRQ(ierr); 58 #endif 59 ierr = PetscFPTrapPop();CHKERRQ(ierr); 60 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 61 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 62 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 63 if (!rwork) { 64 ierr = PetscFree(sing);CHKERRQ(ierr); 65 } 66 if (!work) { 67 ierr = PetscFree(uwork);CHKERRQ(ierr); 68 } 69 /* create B */ 70 if (!range) { 71 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 72 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscArraycpy(data,U+nr*i,(nr-i)*nr);CHKERRQ(ierr); 74 } else { 75 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 76 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 77 ierr = PetscArraycpy(data,U,i*nr);CHKERRQ(ierr); 78 } 79 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 80 ierr = PetscFree(U);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 const PetscScalar *vals; 122 PetscScalar v; 123 124 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 125 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 126 ierr = MatDenseGetArrayRead(GEd,&vals);CHKERRQ(ierr); 127 /* v = PetscAbsScalar(vals[0]) */; 128 v = 1.; 129 cvals[0] = vals[0]/v; 130 cvals[1] = vals[1]/v; 131 ierr = MatDenseRestoreArrayRead(GEd,&vals);CHKERRQ(ierr); 132 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 133 #if defined(PRINT_GDET) 134 { 135 PetscViewer viewer; 136 char filename[256]; 137 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 138 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 139 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 141 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 142 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 143 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 144 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 145 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 146 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 147 } 148 #endif 149 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 150 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 151 } 152 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode PCBDDCNedelecSupport(PC pc) 157 { 158 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 159 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 160 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 161 Vec tvec; 162 PetscSF sfv; 163 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 164 MPI_Comm comm; 165 IS lned,primals,allprimals,nedfieldlocal; 166 IS *eedges,*extrows,*extcols,*alleedges; 167 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 168 PetscScalar *vals,*work; 169 PetscReal *rwork; 170 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 171 PetscInt ne,nv,Lv,order,n,field; 172 PetscInt n_neigh,*neigh,*n_shared,**shared; 173 PetscInt i,j,extmem,cum,maxsize,nee; 174 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 175 PetscInt *sfvleaves,*sfvroots; 176 PetscInt *corners,*cedges; 177 PetscInt *ecount,**eneighs,*vcount,**vneighs; 178 PetscInt *emarks; 179 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 /* If the discrete gradient is defined for a subset of dofs and global is true, 184 it assumes G is given in global ordering for all the dofs. 185 Otherwise, the ordering is global for the Nedelec field */ 186 order = pcbddc->nedorder; 187 conforming = pcbddc->conforming; 188 field = pcbddc->nedfield; 189 global = pcbddc->nedglobal; 190 setprimal = PETSC_FALSE; 191 print = PETSC_FALSE; 192 singular = PETSC_FALSE; 193 194 /* Command line customization */ 195 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 196 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 199 /* print debug info TODO: to be removed */ 200 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 201 ierr = PetscOptionsEnd();CHKERRQ(ierr); 202 203 /* Return if there are no edges in the decomposition and the problem is not singular */ 204 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 205 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 206 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 207 if (!singular) { 208 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 209 lrc[0] = PETSC_FALSE; 210 for (i=0;i<n;i++) { 211 if (PetscRealPart(vals[i]) > 2.) { 212 lrc[0] = PETSC_TRUE; 213 break; 214 } 215 } 216 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 217 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr); 218 if (!lrc[1]) PetscFunctionReturn(0); 219 } 220 221 /* Get Nedelec field */ 222 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 223 if (pcbddc->n_ISForDofsLocal && field >= 0) { 224 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 225 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 226 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 227 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 228 ne = n; 229 nedfieldlocal = NULL; 230 global = PETSC_TRUE; 231 } else if (field == PETSC_DECIDE) { 232 PetscInt rst,ren,*idx; 233 234 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 235 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 236 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 237 for (i=rst;i<ren;i++) { 238 PetscInt nc; 239 240 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 242 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 } 244 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 245 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 246 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 247 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 248 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 249 } else { 250 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 251 } 252 253 /* Sanity checks */ 254 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 255 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 256 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 257 258 /* Just set primal dofs and return */ 259 if (setprimal) { 260 IS enedfieldlocal; 261 PetscInt *eidxs; 262 263 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 264 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 265 if (nedfieldlocal) { 266 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 267 for (i=0,cum=0;i<ne;i++) { 268 if (PetscRealPart(vals[idxs[i]]) > 2.) { 269 eidxs[cum++] = idxs[i]; 270 } 271 } 272 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 273 } else { 274 for (i=0,cum=0;i<ne;i++) { 275 if (PetscRealPart(vals[i]) > 2.) { 276 eidxs[cum++] = i; 277 } 278 } 279 } 280 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 281 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 282 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 283 ierr = PetscFree(eidxs);CHKERRQ(ierr); 284 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 285 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 /* Compute some l2g maps */ 290 if (nedfieldlocal) { 291 IS is; 292 293 /* need to map from the local Nedelec field to local numbering */ 294 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 295 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 296 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 297 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 298 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 299 if (global) { 300 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 301 el2g = al2g; 302 } else { 303 IS gis; 304 305 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 306 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 307 ierr = ISDestroy(&gis);CHKERRQ(ierr); 308 } 309 ierr = ISDestroy(&is);CHKERRQ(ierr); 310 } else { 311 /* restore default */ 312 pcbddc->nedfield = -1; 313 /* one ref for the destruction of al2g, one for el2g */ 314 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 el2g = al2g; 317 fl2g = NULL; 318 } 319 320 /* Start communication to drop connections for interior edges (for cc analysis only) */ 321 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 322 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 323 if (nedfieldlocal) { 324 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 326 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 } else { 328 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 329 } 330 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 331 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 333 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 334 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 335 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 336 if (global) { 337 PetscInt rst; 338 339 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 340 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 341 if (matis->sf_rootdata[i] < 2) { 342 matis->sf_rootdata[cum++] = i + rst; 343 } 344 } 345 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 346 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 347 } else { 348 PetscInt *tbz; 349 350 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 351 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 352 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 353 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 354 for (i=0,cum=0;i<ne;i++) 355 if (matis->sf_leafdata[idxs[i]] == 1) 356 tbz[cum++] = i; 357 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 358 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 359 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 360 ierr = PetscFree(tbz);CHKERRQ(ierr); 361 } 362 } else { /* we need the entire G to infer the nullspace */ 363 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 364 G = pcbddc->discretegradient; 365 } 366 367 /* Extract subdomain relevant rows of G */ 368 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 369 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 370 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 371 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 372 ierr = ISDestroy(&lned);CHKERRQ(ierr); 373 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 374 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 375 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 376 377 /* SF for nodal dofs communications */ 378 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 379 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 380 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 382 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 384 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 386 i = singular ? 2 : 1; 387 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 388 389 /* Destroy temporary G created in MATIS format and modified G */ 390 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 391 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 392 ierr = MatDestroy(&G);CHKERRQ(ierr); 393 394 if (print) { 395 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 396 ierr = MatView(lG,NULL);CHKERRQ(ierr); 397 } 398 399 /* Save lG for values insertion in change of basis */ 400 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 401 402 /* Analyze the edge-nodes connections (duplicate lG) */ 403 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 404 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 405 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 409 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 410 /* need to import the boundary specification to ensure the 411 proper detection of coarse edges' endpoints */ 412 if (pcbddc->DirichletBoundariesLocal) { 413 IS is; 414 415 if (fl2g) { 416 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 417 } else { 418 is = pcbddc->DirichletBoundariesLocal; 419 } 420 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 421 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 422 for (i=0;i<cum;i++) { 423 if (idxs[i] >= 0) { 424 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 425 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 426 } 427 } 428 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 429 if (fl2g) { 430 ierr = ISDestroy(&is);CHKERRQ(ierr); 431 } 432 } 433 if (pcbddc->NeumannBoundariesLocal) { 434 IS is; 435 436 if (fl2g) { 437 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 438 } else { 439 is = pcbddc->NeumannBoundariesLocal; 440 } 441 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 442 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 443 for (i=0;i<cum;i++) { 444 if (idxs[i] >= 0) { 445 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 446 } 447 } 448 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 449 if (fl2g) { 450 ierr = ISDestroy(&is);CHKERRQ(ierr); 451 } 452 } 453 454 /* Count neighs per dof */ 455 ierr = ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 456 ierr = ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 457 458 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 459 for proper detection of coarse edges' endpoints */ 460 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 461 for (i=0;i<ne;i++) { 462 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 463 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 467 if (!conforming) { 468 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 469 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 470 } 471 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 472 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 473 cum = 0; 474 for (i=0;i<ne;i++) { 475 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 476 if (!PetscBTLookup(btee,i)) { 477 marks[cum++] = i; 478 continue; 479 } 480 /* set badly connected edge dofs as primal */ 481 if (!conforming) { 482 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 483 marks[cum++] = i; 484 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 485 for (j=ii[i];j<ii[i+1];j++) { 486 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 487 } 488 } else { 489 /* every edge dofs should be connected trough a certain number of nodal dofs 490 to other edge dofs belonging to coarse edges 491 - at most 2 endpoints 492 - order-1 interior nodal dofs 493 - no undefined nodal dofs (nconn < order) 494 */ 495 PetscInt ends = 0,ints = 0, undef = 0; 496 for (j=ii[i];j<ii[i+1];j++) { 497 PetscInt v = jj[j],k; 498 PetscInt nconn = iit[v+1]-iit[v]; 499 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 500 if (nconn > order) ends++; 501 else if (nconn == order) ints++; 502 else undef++; 503 } 504 if (undef || ends > 2 || ints != order -1) { 505 marks[cum++] = i; 506 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 507 for (j=ii[i];j<ii[i+1];j++) { 508 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 509 } 510 } 511 } 512 } 513 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 514 if (!order && ii[i+1] != ii[i]) { 515 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 516 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 517 } 518 } 519 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 520 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 521 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 if (!conforming) { 523 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 524 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 525 } 526 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 527 528 /* identify splitpoints and corner candidates */ 529 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 530 if (print) { 531 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 532 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 533 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 534 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 535 } 536 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 537 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 538 for (i=0;i<nv;i++) { 539 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 540 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 541 if (!order) { /* variable order */ 542 PetscReal vorder = 0.; 543 544 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 545 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 546 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 547 ord = 1; 548 } 549 if (PetscUnlikelyDebug(test%ord)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord); 550 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 551 if (PetscBTLookup(btbd,jj[j])) { 552 bdir = PETSC_TRUE; 553 break; 554 } 555 if (vc != ecount[jj[j]]) { 556 sneighs = PETSC_FALSE; 557 } else { 558 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 559 for (k=0;k<vc;k++) { 560 if (vn[k] != en[k]) { 561 sneighs = PETSC_FALSE; 562 break; 563 } 564 } 565 } 566 } 567 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 568 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 569 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 570 } else if (test == ord) { 571 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 572 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 573 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 574 } else { 575 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 576 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 577 } 578 } 579 } 580 ierr = ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 581 ierr = ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 582 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 583 584 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 585 if (order != 1) { 586 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 587 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 if (PetscBTLookup(btvcand,i)) { 590 PetscBool found = PETSC_FALSE; 591 for (j=ii[i];j<ii[i+1] && !found;j++) { 592 PetscInt k,e = jj[j]; 593 if (PetscBTLookup(bte,e)) continue; 594 for (k=iit[e];k<iit[e+1];k++) { 595 PetscInt v = jjt[k]; 596 if (v != i && PetscBTLookup(btvcand,v)) { 597 found = PETSC_TRUE; 598 break; 599 } 600 } 601 } 602 if (!found) { 603 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 604 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 605 } else { 606 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 607 } 608 } 609 } 610 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 611 } 612 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 613 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 614 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 615 616 /* Get the local G^T explicitly */ 617 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 618 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 619 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 620 621 /* Mark interior nodal dofs */ 622 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 623 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 624 for (i=1;i<n_neigh;i++) { 625 for (j=0;j<n_shared[i];j++) { 626 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 627 } 628 } 629 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 630 631 /* communicate corners and splitpoints */ 632 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 633 ierr = PetscArrayzero(sfvleaves,nv);CHKERRQ(ierr); 634 ierr = PetscArrayzero(sfvroots,Lv);CHKERRQ(ierr); 635 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 636 637 if (print) { 638 IS tbz; 639 640 cum = 0; 641 for (i=0;i<nv;i++) 642 if (sfvleaves[i]) 643 vmarks[cum++] = i; 644 645 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 646 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 647 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 648 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 649 } 650 651 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 652 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 653 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);CHKERRQ(ierr); 654 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);CHKERRQ(ierr); 655 656 /* Zero rows of lGt corresponding to identified corners 657 and interior nodal dofs */ 658 cum = 0; 659 for (i=0;i<nv;i++) { 660 if (sfvleaves[i]) { 661 vmarks[cum++] = i; 662 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 663 } 664 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 665 } 666 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 667 if (print) { 668 IS tbz; 669 670 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 671 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 672 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 673 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 674 } 675 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 676 ierr = PetscFree(vmarks);CHKERRQ(ierr); 677 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 678 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 679 680 /* Recompute G */ 681 ierr = MatDestroy(&lG);CHKERRQ(ierr); 682 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 683 if (print) { 684 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 685 ierr = MatView(lG,NULL);CHKERRQ(ierr); 686 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 687 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 688 } 689 690 /* Get primal dofs (if any) */ 691 cum = 0; 692 for (i=0;i<ne;i++) { 693 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 694 } 695 if (fl2g) { 696 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 697 } 698 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 699 if (print) { 700 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 701 ierr = ISView(primals,NULL);CHKERRQ(ierr); 702 } 703 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 704 /* TODO: what if the user passed in some of them ? */ 705 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 706 ierr = ISDestroy(&primals);CHKERRQ(ierr); 707 708 /* Compute edge connectivity */ 709 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 710 711 /* Symbolic conn = lG*lGt */ 712 ierr = MatProductCreate(lG,lGt,NULL,&conn);CHKERRQ(ierr); 713 ierr = MatProductSetType(conn,MATPRODUCT_AB);CHKERRQ(ierr); 714 ierr = MatProductSetAlgorithm(conn,"default");CHKERRQ(ierr); 715 ierr = MatProductSetFill(conn,PETSC_DEFAULT);CHKERRQ(ierr); 716 ierr = PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");CHKERRQ(ierr); 717 ierr = MatProductSetFromOptions(conn);CHKERRQ(ierr); 718 ierr = MatProductSymbolic(conn);CHKERRQ(ierr); 719 720 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 721 if (fl2g) { 722 PetscBT btf; 723 PetscInt *iia,*jja,*iiu,*jju; 724 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 725 726 /* create CSR for all local dofs */ 727 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 728 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 729 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 730 iiu = pcbddc->mat_graph->xadj; 731 jju = pcbddc->mat_graph->adjncy; 732 } else if (pcbddc->use_local_adj) { 733 rest = PETSC_TRUE; 734 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 735 } else { 736 free = PETSC_TRUE; 737 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 738 iiu[0] = 0; 739 for (i=0;i<n;i++) { 740 iiu[i+1] = i+1; 741 jju[i] = -1; 742 } 743 } 744 745 /* import sizes of CSR */ 746 iia[0] = 0; 747 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 748 749 /* overwrite entries corresponding to the Nedelec field */ 750 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 751 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 752 for (i=0;i<ne;i++) { 753 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 754 iia[idxs[i]+1] = ii[i+1]-ii[i]; 755 } 756 757 /* iia in CSR */ 758 for (i=0;i<n;i++) iia[i+1] += iia[i]; 759 760 /* jja in CSR */ 761 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 762 for (i=0;i<n;i++) 763 if (!PetscBTLookup(btf,i)) 764 for (j=0;j<iiu[i+1]-iiu[i];j++) 765 jja[iia[i]+j] = jju[iiu[i]+j]; 766 767 /* map edge dofs connectivity */ 768 if (jj) { 769 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 770 for (i=0;i<ne;i++) { 771 PetscInt e = idxs[i]; 772 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 773 } 774 } 775 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 776 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 777 if (rest) { 778 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 779 } 780 if (free) { 781 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 782 } 783 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 784 } else { 785 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 786 } 787 788 /* Analyze interface for edge dofs */ 789 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 790 pcbddc->mat_graph->twodim = PETSC_FALSE; 791 792 /* Get coarse edges in the edge space */ 793 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 794 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 795 796 if (fl2g) { 797 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 798 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 799 for (i=0;i<nee;i++) { 800 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 801 } 802 } else { 803 eedges = alleedges; 804 primals = allprimals; 805 } 806 807 /* Mark fine edge dofs with their coarse edge id */ 808 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 809 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 810 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 811 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 812 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 813 if (print) { 814 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 815 ierr = ISView(primals,NULL);CHKERRQ(ierr); 816 } 817 818 maxsize = 0; 819 for (i=0;i<nee;i++) { 820 PetscInt size,mark = i+1; 821 822 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 823 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 824 for (j=0;j<size;j++) marks[idxs[j]] = mark; 825 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 826 maxsize = PetscMax(maxsize,size); 827 } 828 829 /* Find coarse edge endpoints */ 830 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 831 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 832 for (i=0;i<nee;i++) { 833 PetscInt mark = i+1,size; 834 835 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 836 if (!size && nedfieldlocal) continue; 837 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 838 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 839 if (print) { 840 ierr = PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);CHKERRQ(ierr); 841 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 842 } 843 for (j=0;j<size;j++) { 844 PetscInt k, ee = idxs[j]; 845 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 846 for (k=ii[ee];k<ii[ee+1];k++) { 847 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 848 if (PetscBTLookup(btv,jj[k])) { 849 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 850 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 851 PetscInt k2; 852 PetscBool corner = PETSC_FALSE; 853 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 854 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 855 /* it's a corner if either is connected with an edge dof belonging to a different cc or 856 if the edge dof lie on the natural part of the boundary */ 857 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 858 corner = PETSC_TRUE; 859 break; 860 } 861 } 862 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 863 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 864 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 865 } else { 866 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 867 } 868 } 869 } 870 } 871 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 872 } 873 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 874 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 875 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 876 877 /* Reset marked primal dofs */ 878 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 879 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 880 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 881 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 882 883 /* Now use the initial lG */ 884 ierr = MatDestroy(&lG);CHKERRQ(ierr); 885 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 886 lG = lGinit; 887 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 888 889 /* Compute extended cols indices */ 890 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 891 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 892 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 893 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 894 i *= maxsize; 895 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 896 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 897 eerr = PETSC_FALSE; 898 for (i=0;i<nee;i++) { 899 PetscInt size,found = 0; 900 901 cum = 0; 902 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 903 if (!size && nedfieldlocal) continue; 904 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 905 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 906 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 907 for (j=0;j<size;j++) { 908 PetscInt k,ee = idxs[j]; 909 for (k=ii[ee];k<ii[ee+1];k++) { 910 PetscInt vv = jj[k]; 911 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 912 else if (!PetscBTLookupSet(btvc,vv)) found++; 913 } 914 } 915 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 916 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 917 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 918 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 919 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 920 /* it may happen that endpoints are not defined at this point 921 if it is the case, mark this edge for a second pass */ 922 if (cum != size -1 || found != 2) { 923 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 924 if (print) { 925 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 926 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 927 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 928 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 929 } 930 eerr = PETSC_TRUE; 931 } 932 } 933 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 934 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr); 935 if (done) { 936 PetscInt *newprimals; 937 938 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 939 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 940 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 941 ierr = PetscArraycpy(newprimals,idxs,cum);CHKERRQ(ierr); 942 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 944 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 945 for (i=0;i<nee;i++) { 946 PetscBool has_candidates = PETSC_FALSE; 947 if (PetscBTLookup(bter,i)) { 948 PetscInt size,mark = i+1; 949 950 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 951 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 952 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 953 for (j=0;j<size;j++) { 954 PetscInt k,ee = idxs[j]; 955 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 956 for (k=ii[ee];k<ii[ee+1];k++) { 957 /* set all candidates located on the edge as corners */ 958 if (PetscBTLookup(btvcand,jj[k])) { 959 PetscInt k2,vv = jj[k]; 960 has_candidates = PETSC_TRUE; 961 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 962 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 963 /* set all edge dofs connected to candidate as primals */ 964 for (k2=iit[vv];k2<iit[vv+1];k2++) { 965 if (marks[jjt[k2]] == mark) { 966 PetscInt k3,ee2 = jjt[k2]; 967 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 968 newprimals[cum++] = ee2; 969 /* finally set the new corners */ 970 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 971 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 972 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 973 } 974 } 975 } 976 } else { 977 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 978 } 979 } 980 } 981 if (!has_candidates) { /* circular edge */ 982 PetscInt k, ee = idxs[0],*tmarks; 983 984 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 985 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 986 for (k=ii[ee];k<ii[ee+1];k++) { 987 PetscInt k2; 988 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 989 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 990 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 991 } 992 for (j=0;j<size;j++) { 993 if (tmarks[idxs[j]] > 1) { 994 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 995 newprimals[cum++] = idxs[j]; 996 } 997 } 998 ierr = PetscFree(tmarks);CHKERRQ(ierr); 999 } 1000 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1001 } 1002 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1003 } 1004 ierr = PetscFree(extcols);CHKERRQ(ierr); 1005 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1006 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1007 if (fl2g) { 1008 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1009 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1010 for (i=0;i<nee;i++) { 1011 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1012 } 1013 ierr = PetscFree(eedges);CHKERRQ(ierr); 1014 } 1015 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1016 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1017 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1018 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1019 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1020 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1021 pcbddc->mat_graph->twodim = PETSC_FALSE; 1022 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1023 if (fl2g) { 1024 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1025 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1026 for (i=0;i<nee;i++) { 1027 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1028 } 1029 } else { 1030 eedges = alleedges; 1031 primals = allprimals; 1032 } 1033 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1034 1035 /* Mark again */ 1036 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 1037 for (i=0;i<nee;i++) { 1038 PetscInt size,mark = i+1; 1039 1040 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1041 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1042 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1043 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1044 } 1045 if (print) { 1046 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1047 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1048 } 1049 1050 /* Recompute extended cols */ 1051 eerr = PETSC_FALSE; 1052 for (i=0;i<nee;i++) { 1053 PetscInt size; 1054 1055 cum = 0; 1056 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1057 if (!size && nedfieldlocal) continue; 1058 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1059 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1060 for (j=0;j<size;j++) { 1061 PetscInt k,ee = idxs[j]; 1062 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1063 } 1064 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1065 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1066 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1067 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1068 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1069 if (cum != size -1) { 1070 if (print) { 1071 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1072 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1073 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1074 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1075 } 1076 eerr = PETSC_TRUE; 1077 } 1078 } 1079 } 1080 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1081 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1082 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1083 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1084 /* an error should not occur at this point */ 1085 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1086 1087 /* Check the number of endpoints */ 1088 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1089 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1090 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1091 for (i=0;i<nee;i++) { 1092 PetscInt size, found = 0, gc[2]; 1093 1094 /* init with defaults */ 1095 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1096 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1097 if (!size && nedfieldlocal) continue; 1098 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1099 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1100 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1101 for (j=0;j<size;j++) { 1102 PetscInt k,ee = idxs[j]; 1103 for (k=ii[ee];k<ii[ee+1];k++) { 1104 PetscInt vv = jj[k]; 1105 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1106 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1107 corners[i*2+found++] = vv; 1108 } 1109 } 1110 } 1111 if (found != 2) { 1112 PetscInt e; 1113 if (fl2g) { 1114 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1115 } else { 1116 e = idxs[0]; 1117 } 1118 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1119 } 1120 1121 /* get primal dof index on this coarse edge */ 1122 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1123 if (gc[0] > gc[1]) { 1124 PetscInt swap = corners[2*i]; 1125 corners[2*i] = corners[2*i+1]; 1126 corners[2*i+1] = swap; 1127 } 1128 cedges[i] = idxs[size-1]; 1129 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1130 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1131 } 1132 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1134 1135 if (PetscDefined(USE_DEBUG)) { 1136 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1137 not interfere with neighbouring coarse edges */ 1138 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1139 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 for (i=0;i<nv;i++) { 1141 PetscInt emax = 0,eemax = 0; 1142 1143 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1144 ierr = PetscArrayzero(emarks,nee+1);CHKERRQ(ierr); 1145 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1146 for (j=1;j<nee+1;j++) { 1147 if (emax < emarks[j]) { 1148 emax = emarks[j]; 1149 eemax = j; 1150 } 1151 } 1152 /* not relevant for edges */ 1153 if (!eemax) continue; 1154 1155 for (j=ii[i];j<ii[i+1];j++) { 1156 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1157 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1158 } 1159 } 1160 } 1161 ierr = PetscFree(emarks);CHKERRQ(ierr); 1162 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1163 } 1164 1165 /* Compute extended rows indices for edge blocks of the change of basis */ 1166 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1167 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1168 extmem *= maxsize; 1169 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1170 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1171 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1172 for (i=0;i<nv;i++) { 1173 PetscInt mark = 0,size,start; 1174 1175 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1176 for (j=ii[i];j<ii[i+1];j++) 1177 if (marks[jj[j]] && !mark) 1178 mark = marks[jj[j]]; 1179 1180 /* not relevant */ 1181 if (!mark) continue; 1182 1183 /* import extended row */ 1184 mark--; 1185 start = mark*extmem+extrowcum[mark]; 1186 size = ii[i+1]-ii[i]; 1187 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1188 ierr = PetscArraycpy(extrow+start,jj+ii[i],size);CHKERRQ(ierr); 1189 extrowcum[mark] += size; 1190 } 1191 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1192 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1193 ierr = PetscFree(marks);CHKERRQ(ierr); 1194 1195 /* Compress extrows */ 1196 cum = 0; 1197 for (i=0;i<nee;i++) { 1198 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1199 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1200 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1201 cum = PetscMax(cum,size); 1202 } 1203 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1204 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1205 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1206 1207 /* Workspace for lapack inner calls and VecSetValues */ 1208 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1209 1210 /* Create change of basis matrix (preallocation can be improved) */ 1211 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1212 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1213 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1214 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1215 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1216 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1217 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1218 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1219 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1220 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1221 1222 /* Defaults to identity */ 1223 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1224 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1225 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1226 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1227 1228 /* Create discrete gradient for the coarser level if needed */ 1229 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1230 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1231 if (pcbddc->current_level < pcbddc->max_levels) { 1232 ISLocalToGlobalMapping cel2g,cvl2g; 1233 IS wis,gwis; 1234 PetscInt cnv,cne; 1235 1236 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1237 if (fl2g) { 1238 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1239 } else { 1240 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1241 pcbddc->nedclocal = wis; 1242 } 1243 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1244 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1245 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1246 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1247 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1248 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1249 1250 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1251 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1252 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1253 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1254 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1255 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1256 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1257 1258 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1259 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1260 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1261 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1262 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1263 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1264 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1265 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1266 } 1267 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1268 1269 #if defined(PRINT_GDET) 1270 inc = 0; 1271 lev = pcbddc->current_level; 1272 #endif 1273 1274 /* Insert values in the change of basis matrix */ 1275 for (i=0;i<nee;i++) { 1276 Mat Gins = NULL, GKins = NULL; 1277 IS cornersis = NULL; 1278 PetscScalar cvals[2]; 1279 1280 if (pcbddc->nedcG) { 1281 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1282 } 1283 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1284 if (Gins && GKins) { 1285 const PetscScalar *data; 1286 const PetscInt *rows,*cols; 1287 PetscInt nrh,nch,nrc,ncc; 1288 1289 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1290 /* H1 */ 1291 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1292 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1293 ierr = MatDenseGetArrayRead(Gins,&data);CHKERRQ(ierr); 1294 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1295 ierr = MatDenseRestoreArrayRead(Gins,&data);CHKERRQ(ierr); 1296 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1297 /* complement */ 1298 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1299 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1300 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1301 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1302 ierr = MatDenseGetArrayRead(GKins,&data);CHKERRQ(ierr); 1303 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1304 ierr = MatDenseRestoreArrayRead(GKins,&data);CHKERRQ(ierr); 1305 1306 /* coarse discrete gradient */ 1307 if (pcbddc->nedcG) { 1308 PetscInt cols[2]; 1309 1310 cols[0] = 2*i; 1311 cols[1] = 2*i+1; 1312 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1313 } 1314 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1315 } 1316 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1317 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1318 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1319 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1320 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1321 } 1322 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1323 1324 /* Start assembling */ 1325 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1326 if (pcbddc->nedcG) { 1327 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1328 } 1329 1330 /* Free */ 1331 if (fl2g) { 1332 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1333 for (i=0;i<nee;i++) { 1334 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1335 } 1336 ierr = PetscFree(eedges);CHKERRQ(ierr); 1337 } 1338 1339 /* hack mat_graph with primal dofs on the coarse edges */ 1340 { 1341 PCBDDCGraph graph = pcbddc->mat_graph; 1342 PetscInt *oqueue = graph->queue; 1343 PetscInt *ocptr = graph->cptr; 1344 PetscInt ncc,*idxs; 1345 1346 /* find first primal edge */ 1347 if (pcbddc->nedclocal) { 1348 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1349 } else { 1350 if (fl2g) { 1351 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1352 } 1353 idxs = cedges; 1354 } 1355 cum = 0; 1356 while (cum < nee && cedges[cum] < 0) cum++; 1357 1358 /* adapt connected components */ 1359 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1360 graph->cptr[0] = 0; 1361 for (i=0,ncc=0;i<graph->ncc;i++) { 1362 PetscInt lc = ocptr[i+1]-ocptr[i]; 1363 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1364 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1365 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1366 ncc++; 1367 lc--; 1368 cum++; 1369 while (cum < nee && cedges[cum] < 0) cum++; 1370 } 1371 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1372 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1373 ncc++; 1374 } 1375 graph->ncc = ncc; 1376 if (pcbddc->nedclocal) { 1377 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1380 } 1381 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1382 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1384 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1385 1386 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1387 ierr = PetscFree(extrow);CHKERRQ(ierr); 1388 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1389 ierr = PetscFree(corners);CHKERRQ(ierr); 1390 ierr = PetscFree(cedges);CHKERRQ(ierr); 1391 ierr = PetscFree(extrows);CHKERRQ(ierr); 1392 ierr = PetscFree(extcols);CHKERRQ(ierr); 1393 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1394 1395 /* Complete assembling */ 1396 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 if (pcbddc->nedcG) { 1398 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1399 #if 0 1400 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1401 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1402 #endif 1403 } 1404 1405 /* set change of basis */ 1406 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1407 ierr = MatDestroy(&T);CHKERRQ(ierr); 1408 1409 PetscFunctionReturn(0); 1410 } 1411 1412 /* the near-null space of BDDC carries information on quadrature weights, 1413 and these can be collinear -> so cheat with MatNullSpaceCreate 1414 and create a suitable set of basis vectors first */ 1415 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1416 { 1417 PetscErrorCode ierr; 1418 PetscInt i; 1419 1420 PetscFunctionBegin; 1421 for (i=0;i<nvecs;i++) { 1422 PetscInt first,last; 1423 1424 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1425 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1426 if (i>=first && i < last) { 1427 PetscScalar *data; 1428 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1429 if (!has_const) { 1430 data[i-first] = 1.; 1431 } else { 1432 data[2*i-first] = 1./PetscSqrtReal(2.); 1433 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1434 } 1435 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1436 } 1437 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1438 } 1439 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1440 for (i=0;i<nvecs;i++) { /* reset vectors */ 1441 PetscInt first,last; 1442 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1443 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1444 if (i>=first && i < last) { 1445 PetscScalar *data; 1446 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1447 if (!has_const) { 1448 data[i-first] = 0.; 1449 } else { 1450 data[2*i-first] = 0.; 1451 data[2*i-first+1] = 0.; 1452 } 1453 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1454 } 1455 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1456 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1462 { 1463 Mat loc_divudotp; 1464 Vec p,v,vins,quad_vec,*quad_vecs; 1465 ISLocalToGlobalMapping map; 1466 PetscScalar *vals; 1467 const PetscScalar *array; 1468 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1469 PetscInt n_neigh,*neigh,*n_shared,**shared; 1470 PetscMPIInt rank; 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1475 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1476 ierr = MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 1477 if (!maxneighs) { 1478 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1479 *nnsp = NULL; 1480 PetscFunctionReturn(0); 1481 } 1482 maxsize = 0; 1483 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1484 ierr = PetscMalloc2(maxsize,&gidxs,maxsize,&vals);CHKERRQ(ierr); 1485 /* create vectors to hold quadrature weights */ 1486 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1487 if (!transpose) { 1488 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1489 } else { 1490 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1491 } 1492 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1493 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1494 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1495 for (i=0;i<maxneighs;i++) { 1496 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1497 } 1498 1499 /* compute local quad vec */ 1500 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1501 if (!transpose) { 1502 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1503 } else { 1504 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1505 } 1506 ierr = VecSet(p,1.);CHKERRQ(ierr); 1507 if (!transpose) { 1508 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1509 } else { 1510 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1511 } 1512 if (vl2l) { 1513 Mat lA; 1514 VecScatter sc; 1515 1516 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1517 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1518 ierr = VecScatterCreate(v,NULL,vins,vl2l,&sc);CHKERRQ(ierr); 1519 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1522 } else { 1523 vins = v; 1524 } 1525 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1526 ierr = VecDestroy(&p);CHKERRQ(ierr); 1527 1528 /* insert in global quadrature vecs */ 1529 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 1530 for (i=1;i<n_neigh;i++) { 1531 const PetscInt *idxs; 1532 PetscInt idx,nn,j; 1533 1534 idxs = shared[i]; 1535 nn = n_shared[i]; 1536 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1537 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1538 idx = -(idx+1); 1539 if (idx < 0 || idx >= maxneighs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs); 1540 ierr = ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);CHKERRQ(ierr); 1541 ierr = VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1542 } 1543 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1544 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1545 if (vl2l) { 1546 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1547 } 1548 ierr = VecDestroy(&v);CHKERRQ(ierr); 1549 ierr = PetscFree2(gidxs,vals);CHKERRQ(ierr); 1550 1551 /* assemble near null space */ 1552 for (i=0;i<maxneighs;i++) { 1553 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1554 } 1555 for (i=0;i<maxneighs;i++) { 1556 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1557 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1558 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1559 } 1560 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1565 { 1566 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1567 PetscErrorCode ierr; 1568 1569 PetscFunctionBegin; 1570 if (primalv) { 1571 if (pcbddc->user_primal_vertices_local) { 1572 IS list[2], newp; 1573 1574 list[0] = primalv; 1575 list[1] = pcbddc->user_primal_vertices_local; 1576 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1577 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1578 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1579 pcbddc->user_primal_vertices_local = newp; 1580 } else { 1581 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1582 } 1583 } 1584 PetscFunctionReturn(0); 1585 } 1586 1587 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1588 { 1589 PetscInt f, *comp = (PetscInt *)ctx; 1590 1591 PetscFunctionBegin; 1592 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1593 PetscFunctionReturn(0); 1594 } 1595 1596 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1597 { 1598 PetscErrorCode ierr; 1599 Vec local,global; 1600 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1601 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1602 PetscBool monolithic = PETSC_FALSE; 1603 1604 PetscFunctionBegin; 1605 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1606 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1607 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1608 /* need to convert from global to local topology information and remove references to information in global ordering */ 1609 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1610 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1611 ierr = VecBindToCPU(global,PETSC_TRUE);CHKERRQ(ierr); 1612 ierr = VecBindToCPU(local,PETSC_TRUE);CHKERRQ(ierr); 1613 if (monolithic) { /* just get block size to properly compute vertices */ 1614 if (pcbddc->vertex_size == 1) { 1615 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1616 } 1617 goto boundary; 1618 } 1619 1620 if (pcbddc->user_provided_isfordofs) { 1621 if (pcbddc->n_ISForDofs) { 1622 PetscInt i; 1623 1624 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1625 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1626 PetscInt bs; 1627 1628 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1629 ierr = ISGetBlockSize(pcbddc->ISForDofs[i],&bs);CHKERRQ(ierr); 1630 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1631 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1632 } 1633 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1634 pcbddc->n_ISForDofs = 0; 1635 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1636 } 1637 } else { 1638 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1639 DM dm; 1640 1641 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1642 if (!dm) { 1643 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1644 } 1645 if (dm) { 1646 IS *fields; 1647 PetscInt nf,i; 1648 1649 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1650 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1651 for (i=0;i<nf;i++) { 1652 PetscInt bs; 1653 1654 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1655 ierr = ISGetBlockSize(fields[i],&bs);CHKERRQ(ierr); 1656 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1657 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1658 } 1659 ierr = PetscFree(fields);CHKERRQ(ierr); 1660 pcbddc->n_ISForDofsLocal = nf; 1661 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1662 PetscContainer c; 1663 1664 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1665 if (c) { 1666 MatISLocalFields lf; 1667 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1668 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1669 } else { /* fallback, create the default fields if bs > 1 */ 1670 PetscInt i, n = matis->A->rmap->n; 1671 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1672 if (i > 1) { 1673 pcbddc->n_ISForDofsLocal = i; 1674 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1675 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1676 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1677 } 1678 } 1679 } 1680 } 1681 } else { 1682 PetscInt i; 1683 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1684 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1685 } 1686 } 1687 } 1688 1689 boundary: 1690 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1691 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1692 } else if (pcbddc->DirichletBoundariesLocal) { 1693 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1694 } 1695 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1696 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1697 } else if (pcbddc->NeumannBoundariesLocal) { 1698 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1699 } 1700 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1701 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1702 } 1703 ierr = VecDestroy(&global);CHKERRQ(ierr); 1704 ierr = VecDestroy(&local);CHKERRQ(ierr); 1705 /* detect local disconnected subdomains if requested (use matis->A) */ 1706 if (pcbddc->detect_disconnected) { 1707 IS primalv = NULL; 1708 PetscInt i; 1709 PetscBool filter = pcbddc->detect_disconnected_filter; 1710 1711 for (i=0;i<pcbddc->n_local_subs;i++) { 1712 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1713 } 1714 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1715 ierr = PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1716 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1717 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1718 } 1719 /* early stage corner detection */ 1720 { 1721 DM dm; 1722 1723 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1724 if (!dm) { 1725 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1726 } 1727 if (dm) { 1728 PetscBool isda; 1729 1730 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1731 if (isda) { 1732 ISLocalToGlobalMapping l2l; 1733 IS corners; 1734 Mat lA; 1735 PetscBool gl,lo; 1736 1737 { 1738 Vec cvec; 1739 const PetscScalar *coords; 1740 PetscInt dof,n,cdim; 1741 PetscBool memc = PETSC_TRUE; 1742 1743 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1744 ierr = DMGetCoordinates(dm,&cvec);CHKERRQ(ierr); 1745 ierr = VecGetLocalSize(cvec,&n);CHKERRQ(ierr); 1746 ierr = VecGetBlockSize(cvec,&cdim);CHKERRQ(ierr); 1747 n /= cdim; 1748 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 1749 ierr = PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);CHKERRQ(ierr); 1750 ierr = VecGetArrayRead(cvec,&coords);CHKERRQ(ierr); 1751 #if defined(PETSC_USE_COMPLEX) 1752 memc = PETSC_FALSE; 1753 #endif 1754 if (dof != 1) memc = PETSC_FALSE; 1755 if (memc) { 1756 ierr = PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);CHKERRQ(ierr); 1757 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1758 PetscReal *bcoords = pcbddc->mat_graph->coords; 1759 PetscInt i, b, d; 1760 1761 for (i=0;i<n;i++) { 1762 for (b=0;b<dof;b++) { 1763 for (d=0;d<cdim;d++) { 1764 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1765 } 1766 } 1767 } 1768 } 1769 ierr = VecRestoreArrayRead(cvec,&coords);CHKERRQ(ierr); 1770 pcbddc->mat_graph->cdim = cdim; 1771 pcbddc->mat_graph->cnloc = dof*n; 1772 pcbddc->mat_graph->cloc = PETSC_FALSE; 1773 } 1774 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1775 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1776 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1777 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1778 lo = (PetscBool)(l2l && corners); 1779 ierr = MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 1780 if (gl) { /* From PETSc's DMDA */ 1781 const PetscInt *idx; 1782 PetscInt dof,bs,*idxout,n; 1783 1784 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1785 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1786 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1787 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1788 if (bs == dof) { 1789 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1790 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1791 } else { /* the original DMDA local-to-local map have been modified */ 1792 PetscInt i,d; 1793 1794 ierr = PetscMalloc1(dof*n,&idxout);CHKERRQ(ierr); 1795 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1796 ierr = ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);CHKERRQ(ierr); 1797 1798 bs = 1; 1799 n *= dof; 1800 } 1801 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1802 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1803 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1804 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1805 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1806 pcbddc->corner_selected = PETSC_TRUE; 1807 pcbddc->corner_selection = PETSC_TRUE; 1808 } 1809 if (corners) { 1810 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1811 } 1812 } 1813 } 1814 } 1815 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1816 DM dm; 1817 1818 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1819 if (!dm) { 1820 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1821 } 1822 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1823 Vec vcoords; 1824 PetscSection section; 1825 PetscReal *coords; 1826 PetscInt d,cdim,nl,nf,**ctxs; 1827 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1828 1829 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1830 ierr = DMGetLocalSection(dm,§ion);CHKERRQ(ierr); 1831 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1832 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1833 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1834 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1835 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1836 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1837 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1838 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1839 for (d=0;d<cdim;d++) { 1840 PetscInt i; 1841 const PetscScalar *v; 1842 1843 for (i=0;i<nf;i++) ctxs[i][0] = d; 1844 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1845 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1846 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1847 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1848 } 1849 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1850 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1851 ierr = PetscFree(coords);CHKERRQ(ierr); 1852 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1853 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1854 } 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1860 { 1861 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1862 PetscErrorCode ierr; 1863 IS nis; 1864 const PetscInt *idxs; 1865 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1866 1867 PetscFunctionBegin; 1868 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1869 if (mop == MPI_LAND) { 1870 /* init rootdata with true */ 1871 for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1; 1872 } else { 1873 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 1874 } 1875 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 1876 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1877 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1878 for (i=0;i<nd;i++) 1879 if (-1 < idxs[i] && idxs[i] < n) 1880 matis->sf_leafdata[idxs[i]] = 1; 1881 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1882 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1883 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1884 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 1885 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);CHKERRQ(ierr); 1886 if (mop == MPI_LAND) { 1887 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1888 } else { 1889 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1890 } 1891 for (i=0,nnd=0;i<n;i++) 1892 if (matis->sf_leafdata[i]) 1893 nidxs[nnd++] = i; 1894 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1895 ierr = ISDestroy(is);CHKERRQ(ierr); 1896 *is = nis; 1897 PetscFunctionReturn(0); 1898 } 1899 1900 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1901 { 1902 PC_IS *pcis = (PC_IS*)(pc->data); 1903 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 if (!pcbddc->benign_have_null) { 1908 PetscFunctionReturn(0); 1909 } 1910 if (pcbddc->ChangeOfBasisMatrix) { 1911 Vec swap; 1912 1913 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1914 swap = pcbddc->work_change; 1915 pcbddc->work_change = r; 1916 r = swap; 1917 } 1918 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1919 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1920 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);CHKERRQ(ierr); 1921 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1922 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);CHKERRQ(ierr); 1923 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 1924 ierr = VecSet(z,0.);CHKERRQ(ierr); 1925 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1926 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1927 if (pcbddc->ChangeOfBasisMatrix) { 1928 pcbddc->work_change = r; 1929 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1930 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1936 { 1937 PCBDDCBenignMatMult_ctx ctx; 1938 PetscErrorCode ierr; 1939 PetscBool apply_right,apply_left,reset_x; 1940 1941 PetscFunctionBegin; 1942 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1943 if (transpose) { 1944 apply_right = ctx->apply_left; 1945 apply_left = ctx->apply_right; 1946 } else { 1947 apply_right = ctx->apply_right; 1948 apply_left = ctx->apply_left; 1949 } 1950 reset_x = PETSC_FALSE; 1951 if (apply_right) { 1952 const PetscScalar *ax; 1953 PetscInt nl,i; 1954 1955 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1956 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1957 ierr = PetscArraycpy(ctx->work,ax,nl);CHKERRQ(ierr); 1958 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1959 for (i=0;i<ctx->benign_n;i++) { 1960 PetscScalar sum,val; 1961 const PetscInt *idxs; 1962 PetscInt nz,j; 1963 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1964 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1965 sum = 0.; 1966 if (ctx->apply_p0) { 1967 val = ctx->work[idxs[nz-1]]; 1968 for (j=0;j<nz-1;j++) { 1969 sum += ctx->work[idxs[j]]; 1970 ctx->work[idxs[j]] += val; 1971 } 1972 } else { 1973 for (j=0;j<nz-1;j++) { 1974 sum += ctx->work[idxs[j]]; 1975 } 1976 } 1977 ctx->work[idxs[nz-1]] -= sum; 1978 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1979 } 1980 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1981 reset_x = PETSC_TRUE; 1982 } 1983 if (transpose) { 1984 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1985 } else { 1986 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1987 } 1988 if (reset_x) { 1989 ierr = VecResetArray(x);CHKERRQ(ierr); 1990 } 1991 if (apply_left) { 1992 PetscScalar *ay; 1993 PetscInt i; 1994 1995 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1996 for (i=0;i<ctx->benign_n;i++) { 1997 PetscScalar sum,val; 1998 const PetscInt *idxs; 1999 PetscInt nz,j; 2000 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2001 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2002 val = -ay[idxs[nz-1]]; 2003 if (ctx->apply_p0) { 2004 sum = 0.; 2005 for (j=0;j<nz-1;j++) { 2006 sum += ay[idxs[j]]; 2007 ay[idxs[j]] += val; 2008 } 2009 ay[idxs[nz-1]] += sum; 2010 } else { 2011 for (j=0;j<nz-1;j++) { 2012 ay[idxs[j]] += val; 2013 } 2014 ay[idxs[nz-1]] = 0.; 2015 } 2016 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2017 } 2018 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 2019 } 2020 PetscFunctionReturn(0); 2021 } 2022 2023 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2024 { 2025 PetscErrorCode ierr; 2026 2027 PetscFunctionBegin; 2028 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2029 PetscFunctionReturn(0); 2030 } 2031 2032 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2033 { 2034 PetscErrorCode ierr; 2035 2036 PetscFunctionBegin; 2037 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2038 PetscFunctionReturn(0); 2039 } 2040 2041 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2042 { 2043 PC_IS *pcis = (PC_IS*)pc->data; 2044 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2045 PCBDDCBenignMatMult_ctx ctx; 2046 PetscErrorCode ierr; 2047 2048 PetscFunctionBegin; 2049 if (!restore) { 2050 Mat A_IB,A_BI; 2051 PetscScalar *work; 2052 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2053 2054 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2055 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2056 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2057 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2058 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2059 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2060 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2061 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2062 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2063 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2064 ctx->apply_left = PETSC_TRUE; 2065 ctx->apply_right = PETSC_FALSE; 2066 ctx->apply_p0 = PETSC_FALSE; 2067 ctx->benign_n = pcbddc->benign_n; 2068 if (reuse) { 2069 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2070 ctx->free = PETSC_FALSE; 2071 } else { /* TODO: could be optimized for successive solves */ 2072 ISLocalToGlobalMapping N_to_D; 2073 PetscInt i; 2074 2075 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2076 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2077 for (i=0;i<pcbddc->benign_n;i++) { 2078 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2079 } 2080 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2081 ctx->free = PETSC_TRUE; 2082 } 2083 ctx->A = pcis->A_IB; 2084 ctx->work = work; 2085 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2086 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 pcis->A_IB = A_IB; 2089 2090 /* A_BI as A_IB^T */ 2091 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2092 pcbddc->benign_original_mat = pcis->A_BI; 2093 pcis->A_BI = A_BI; 2094 } else { 2095 if (!pcbddc->benign_original_mat) { 2096 PetscFunctionReturn(0); 2097 } 2098 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2099 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2100 pcis->A_IB = ctx->A; 2101 ctx->A = NULL; 2102 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2103 pcis->A_BI = pcbddc->benign_original_mat; 2104 pcbddc->benign_original_mat = NULL; 2105 if (ctx->free) { 2106 PetscInt i; 2107 for (i=0;i<ctx->benign_n;i++) { 2108 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2109 } 2110 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2111 } 2112 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2113 ierr = PetscFree(ctx);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 /* used just in bddc debug mode */ 2119 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2120 { 2121 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2122 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2123 Mat An; 2124 PetscErrorCode ierr; 2125 2126 PetscFunctionBegin; 2127 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2128 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2129 if (is1) { 2130 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2131 ierr = MatDestroy(&An);CHKERRQ(ierr); 2132 } else { 2133 *B = An; 2134 } 2135 PetscFunctionReturn(0); 2136 } 2137 2138 /* TODO: add reuse flag */ 2139 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2140 { 2141 Mat Bt; 2142 PetscScalar *a,*bdata; 2143 const PetscInt *ii,*ij; 2144 PetscInt m,n,i,nnz,*bii,*bij; 2145 PetscBool flg_row; 2146 PetscErrorCode ierr; 2147 2148 PetscFunctionBegin; 2149 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2150 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2151 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2152 nnz = n; 2153 for (i=0;i<ii[n];i++) { 2154 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2155 } 2156 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2157 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2158 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2159 nnz = 0; 2160 bii[0] = 0; 2161 for (i=0;i<n;i++) { 2162 PetscInt j; 2163 for (j=ii[i];j<ii[i+1];j++) { 2164 PetscScalar entry = a[j]; 2165 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2166 bij[nnz] = ij[j]; 2167 bdata[nnz] = entry; 2168 nnz++; 2169 } 2170 } 2171 bii[i+1] = nnz; 2172 } 2173 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2174 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2175 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2176 { 2177 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2178 b->free_a = PETSC_TRUE; 2179 b->free_ij = PETSC_TRUE; 2180 } 2181 if (*B == A) { 2182 ierr = MatDestroy(&A);CHKERRQ(ierr); 2183 } 2184 *B = Bt; 2185 PetscFunctionReturn(0); 2186 } 2187 2188 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2189 { 2190 Mat B = NULL; 2191 DM dm; 2192 IS is_dummy,*cc_n; 2193 ISLocalToGlobalMapping l2gmap_dummy; 2194 PCBDDCGraph graph; 2195 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2196 PetscInt i,n; 2197 PetscInt *xadj,*adjncy; 2198 PetscBool isplex = PETSC_FALSE; 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 if (ncc) *ncc = 0; 2203 if (cc) *cc = NULL; 2204 if (primalv) *primalv = NULL; 2205 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2206 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2207 if (!dm) { 2208 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2209 } 2210 if (dm) { 2211 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2212 } 2213 if (filter) isplex = PETSC_FALSE; 2214 2215 if (isplex) { /* this code has been modified from plexpartition.c */ 2216 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2217 PetscInt *adj = NULL; 2218 IS cellNumbering; 2219 const PetscInt *cellNum; 2220 PetscBool useCone, useClosure; 2221 PetscSection section; 2222 PetscSegBuffer adjBuffer; 2223 PetscSF sfPoint; 2224 PetscErrorCode ierr; 2225 2226 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2227 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2228 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2229 /* Build adjacency graph via a section/segbuffer */ 2230 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2231 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2232 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2233 /* Always use FVM adjacency to create partitioner graph */ 2234 ierr = DMGetBasicAdjacency(dm, &useCone, &useClosure);CHKERRQ(ierr); 2235 ierr = DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);CHKERRQ(ierr); 2236 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2237 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2238 for (n = 0, p = pStart; p < pEnd; p++) { 2239 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2240 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2241 adjSize = PETSC_DETERMINE; 2242 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2243 for (a = 0; a < adjSize; ++a) { 2244 const PetscInt point = adj[a]; 2245 if (pStart <= point && point < pEnd) { 2246 PetscInt *PETSC_RESTRICT pBuf; 2247 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2248 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2249 *pBuf = point; 2250 } 2251 } 2252 n++; 2253 } 2254 ierr = DMSetBasicAdjacency(dm, useCone, useClosure);CHKERRQ(ierr); 2255 /* Derive CSR graph from section/segbuffer */ 2256 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2257 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2258 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2259 for (idx = 0, p = pStart; p < pEnd; p++) { 2260 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2261 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2262 } 2263 xadj[n] = size; 2264 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2265 /* Clean up */ 2266 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2267 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2268 ierr = PetscFree(adj);CHKERRQ(ierr); 2269 graph->xadj = xadj; 2270 graph->adjncy = adjncy; 2271 } else { 2272 Mat A; 2273 PetscBool isseqaij, flg_row; 2274 2275 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2276 if (!A->rmap->N || !A->cmap->N) { 2277 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2278 PetscFunctionReturn(0); 2279 } 2280 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2281 if (!isseqaij && filter) { 2282 PetscBool isseqdense; 2283 2284 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2285 if (!isseqdense) { 2286 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2287 } else { /* TODO: rectangular case and LDA */ 2288 PetscScalar *array; 2289 PetscReal chop=1.e-6; 2290 2291 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2292 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2293 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2294 for (i=0;i<n;i++) { 2295 PetscInt j; 2296 for (j=i+1;j<n;j++) { 2297 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2298 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2299 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2300 } 2301 } 2302 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2303 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2304 } 2305 } else { 2306 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2307 B = A; 2308 } 2309 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2310 2311 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2312 if (filter) { 2313 PetscScalar *data; 2314 PetscInt j,cum; 2315 2316 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2317 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2318 cum = 0; 2319 for (i=0;i<n;i++) { 2320 PetscInt t; 2321 2322 for (j=xadj[i];j<xadj[i+1];j++) { 2323 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2324 continue; 2325 } 2326 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2327 } 2328 t = xadj_filtered[i]; 2329 xadj_filtered[i] = cum; 2330 cum += t; 2331 } 2332 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2333 graph->xadj = xadj_filtered; 2334 graph->adjncy = adjncy_filtered; 2335 } else { 2336 graph->xadj = xadj; 2337 graph->adjncy = adjncy; 2338 } 2339 } 2340 /* compute local connected components using PCBDDCGraph */ 2341 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2342 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2343 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2344 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2345 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2346 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2347 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2348 2349 /* partial clean up */ 2350 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2351 if (B) { 2352 PetscBool flg_row; 2353 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2354 ierr = MatDestroy(&B);CHKERRQ(ierr); 2355 } 2356 if (isplex) { 2357 ierr = PetscFree(xadj);CHKERRQ(ierr); 2358 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2359 } 2360 2361 /* get back data */ 2362 if (isplex) { 2363 if (ncc) *ncc = graph->ncc; 2364 if (cc || primalv) { 2365 Mat A; 2366 PetscBT btv,btvt; 2367 PetscSection subSection; 2368 PetscInt *ids,cum,cump,*cids,*pids; 2369 2370 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2371 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2372 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2373 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2374 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2375 2376 cids[0] = 0; 2377 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2378 PetscInt j; 2379 2380 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2381 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2382 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2383 2384 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2385 for (k = 0; k < 2*size; k += 2) { 2386 PetscInt s, pp, p = closure[k], off, dof, cdof; 2387 2388 ierr = PetscSectionGetConstraintDof(subSection,p,&cdof);CHKERRQ(ierr); 2389 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2390 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2391 for (s = 0; s < dof-cdof; s++) { 2392 if (PetscBTLookupSet(btvt,off+s)) continue; 2393 if (!PetscBTLookup(btv,off+s)) { 2394 ids[cum++] = off+s; 2395 } else { /* cross-vertex */ 2396 pids[cump++] = off+s; 2397 } 2398 } 2399 ierr = DMPlexGetTreeParent(dm,p,&pp,NULL);CHKERRQ(ierr); 2400 if (pp != p) { 2401 ierr = PetscSectionGetConstraintDof(subSection,pp,&cdof);CHKERRQ(ierr); 2402 ierr = PetscSectionGetOffset(subSection,pp,&off);CHKERRQ(ierr); 2403 ierr = PetscSectionGetDof(subSection,pp,&dof);CHKERRQ(ierr); 2404 for (s = 0; s < dof-cdof; s++) { 2405 if (PetscBTLookupSet(btvt,off+s)) continue; 2406 if (!PetscBTLookup(btv,off+s)) { 2407 ids[cum++] = off+s; 2408 } else { /* cross-vertex */ 2409 pids[cump++] = off+s; 2410 } 2411 } 2412 } 2413 } 2414 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2415 } 2416 cids[i+1] = cum; 2417 /* mark dofs as already assigned */ 2418 for (j = cids[i]; j < cids[i+1]; j++) { 2419 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2420 } 2421 } 2422 if (cc) { 2423 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2424 for (i = 0; i < graph->ncc; i++) { 2425 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2426 } 2427 *cc = cc_n; 2428 } 2429 if (primalv) { 2430 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2431 } 2432 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2433 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2434 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2435 } 2436 } else { 2437 if (ncc) *ncc = graph->ncc; 2438 if (cc) { 2439 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2440 for (i=0;i<graph->ncc;i++) { 2441 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2442 } 2443 *cc = cc_n; 2444 } 2445 } 2446 /* clean up graph */ 2447 graph->xadj = NULL; 2448 graph->adjncy = NULL; 2449 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2454 { 2455 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2456 PC_IS* pcis = (PC_IS*)(pc->data); 2457 IS dirIS = NULL; 2458 PetscInt i; 2459 PetscErrorCode ierr; 2460 2461 PetscFunctionBegin; 2462 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2463 if (zerodiag) { 2464 Mat A; 2465 Vec vec3_N; 2466 PetscScalar *vals; 2467 const PetscInt *idxs; 2468 PetscInt nz,*count; 2469 2470 /* p0 */ 2471 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2472 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2473 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2474 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2475 for (i=0;i<nz;i++) vals[i] = 1.; 2476 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2477 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2478 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2479 /* v_I */ 2480 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2481 for (i=0;i<nz;i++) vals[i] = 0.; 2482 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2483 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2484 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2485 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2486 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2487 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2488 if (dirIS) { 2489 PetscInt n; 2490 2491 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2492 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2493 for (i=0;i<n;i++) vals[i] = 0.; 2494 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2495 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2496 } 2497 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2498 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2499 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2500 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2501 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2502 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2503 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2504 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2505 ierr = PetscFree(vals);CHKERRQ(ierr); 2506 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2507 2508 /* there should not be any pressure dofs lying on the interface */ 2509 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2510 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2511 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2512 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2513 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2514 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2515 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2516 ierr = PetscFree(count);CHKERRQ(ierr); 2517 } 2518 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2519 2520 /* check PCBDDCBenignGetOrSetP0 */ 2521 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2522 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2523 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2524 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2525 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2526 for (i=0;i<pcbddc->benign_n;i++) { 2527 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2528 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2529 } 2530 PetscFunctionReturn(0); 2531 } 2532 2533 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2534 { 2535 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2536 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2537 PetscInt nz,n,benign_n,bsp = 1; 2538 PetscInt *interior_dofs,n_interior_dofs,nneu; 2539 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2540 PetscErrorCode ierr; 2541 2542 PetscFunctionBegin; 2543 if (reuse) goto project_b0; 2544 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2545 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2546 for (n=0;n<pcbddc->benign_n;n++) { 2547 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2548 } 2549 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2550 has_null_pressures = PETSC_TRUE; 2551 have_null = PETSC_TRUE; 2552 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2553 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2554 Checks if all the pressure dofs in each subdomain have a zero diagonal 2555 If not, a change of basis on pressures is not needed 2556 since the local Schur complements are already SPD 2557 */ 2558 if (pcbddc->n_ISForDofsLocal) { 2559 IS iP = NULL; 2560 PetscInt p,*pp; 2561 PetscBool flg; 2562 2563 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);CHKERRQ(ierr); 2564 n = pcbddc->n_ISForDofsLocal; 2565 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2566 ierr = PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);CHKERRQ(ierr); 2567 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2568 if (!flg) { 2569 n = 1; 2570 pp[0] = pcbddc->n_ISForDofsLocal-1; 2571 } 2572 2573 bsp = 0; 2574 for (p=0;p<n;p++) { 2575 PetscInt bs; 2576 2577 if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2578 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2579 bsp += bs; 2580 } 2581 ierr = PetscMalloc1(bsp,&bzerodiag);CHKERRQ(ierr); 2582 bsp = 0; 2583 for (p=0;p<n;p++) { 2584 const PetscInt *idxs; 2585 PetscInt b,bs,npl,*bidxs; 2586 2587 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2588 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);CHKERRQ(ierr); 2589 ierr = ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2590 ierr = PetscMalloc1(npl/bs,&bidxs);CHKERRQ(ierr); 2591 for (b=0;b<bs;b++) { 2592 PetscInt i; 2593 2594 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2595 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);CHKERRQ(ierr); 2596 bsp++; 2597 } 2598 ierr = PetscFree(bidxs);CHKERRQ(ierr); 2599 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2600 } 2601 ierr = ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);CHKERRQ(ierr); 2602 2603 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2604 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2605 if (iP) { 2606 IS newpressures; 2607 2608 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2609 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2610 pressures = newpressures; 2611 } 2612 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2613 if (!sorted) { 2614 ierr = ISSort(pressures);CHKERRQ(ierr); 2615 } 2616 ierr = PetscFree(pp);CHKERRQ(ierr); 2617 } 2618 2619 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2620 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2621 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2622 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2623 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2624 if (!sorted) { 2625 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2626 } 2627 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2628 zerodiag_save = zerodiag; 2629 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2630 if (!nz) { 2631 if (n) have_null = PETSC_FALSE; 2632 has_null_pressures = PETSC_FALSE; 2633 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2634 } 2635 recompute_zerodiag = PETSC_FALSE; 2636 2637 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2638 zerodiag_subs = NULL; 2639 benign_n = 0; 2640 n_interior_dofs = 0; 2641 interior_dofs = NULL; 2642 nneu = 0; 2643 if (pcbddc->NeumannBoundariesLocal) { 2644 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2645 } 2646 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2647 if (checkb) { /* need to compute interior nodes */ 2648 PetscInt n,i,j; 2649 PetscInt n_neigh,*neigh,*n_shared,**shared; 2650 PetscInt *iwork; 2651 2652 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2653 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2654 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2655 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2656 for (i=1;i<n_neigh;i++) 2657 for (j=0;j<n_shared[i];j++) 2658 iwork[shared[i][j]] += 1; 2659 for (i=0;i<n;i++) 2660 if (!iwork[i]) 2661 interior_dofs[n_interior_dofs++] = i; 2662 ierr = PetscFree(iwork);CHKERRQ(ierr); 2663 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2664 } 2665 if (has_null_pressures) { 2666 IS *subs; 2667 PetscInt nsubs,i,j,nl; 2668 const PetscInt *idxs; 2669 PetscScalar *array; 2670 Vec *work; 2671 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2672 2673 subs = pcbddc->local_subs; 2674 nsubs = pcbddc->n_local_subs; 2675 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2676 if (checkb) { 2677 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2678 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2679 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2680 /* work[0] = 1_p */ 2681 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2682 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2683 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2684 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2685 /* work[0] = 1_v */ 2686 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2687 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2688 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2689 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2690 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2691 } 2692 2693 if (nsubs > 1 || bsp > 1) { 2694 IS *is; 2695 PetscInt b,totb; 2696 2697 totb = bsp; 2698 is = bsp > 1 ? bzerodiag : &zerodiag; 2699 nsubs = PetscMax(nsubs,1); 2700 ierr = PetscCalloc1(nsubs*totb,&zerodiag_subs);CHKERRQ(ierr); 2701 for (b=0;b<totb;b++) { 2702 for (i=0;i<nsubs;i++) { 2703 ISLocalToGlobalMapping l2g; 2704 IS t_zerodiag_subs; 2705 PetscInt nl; 2706 2707 if (subs) { 2708 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2709 } else { 2710 IS tis; 2711 2712 ierr = MatGetLocalSize(pcbddc->local_mat,&nl,NULL);CHKERRQ(ierr); 2713 ierr = ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);CHKERRQ(ierr); 2714 ierr = ISLocalToGlobalMappingCreateIS(tis,&l2g);CHKERRQ(ierr); 2715 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2716 } 2717 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);CHKERRQ(ierr); 2718 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2719 if (nl) { 2720 PetscBool valid = PETSC_TRUE; 2721 2722 if (checkb) { 2723 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2724 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2725 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2726 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2727 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2728 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2729 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2730 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2731 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2732 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2733 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2734 for (j=0;j<n_interior_dofs;j++) { 2735 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2736 valid = PETSC_FALSE; 2737 break; 2738 } 2739 } 2740 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2741 } 2742 if (valid && nneu) { 2743 const PetscInt *idxs; 2744 PetscInt nzb; 2745 2746 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2747 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2748 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2749 if (nzb) valid = PETSC_FALSE; 2750 } 2751 if (valid && pressures) { 2752 IS t_pressure_subs,tmp; 2753 PetscInt i1,i2; 2754 2755 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2756 ierr = ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);CHKERRQ(ierr); 2757 ierr = ISGetLocalSize(tmp,&i1);CHKERRQ(ierr); 2758 ierr = ISGetLocalSize(t_zerodiag_subs,&i2);CHKERRQ(ierr); 2759 if (i2 != i1) valid = PETSC_FALSE; 2760 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2761 ierr = ISDestroy(&tmp);CHKERRQ(ierr); 2762 } 2763 if (valid) { 2764 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);CHKERRQ(ierr); 2765 benign_n++; 2766 } else recompute_zerodiag = PETSC_TRUE; 2767 } 2768 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2769 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2770 } 2771 } 2772 } else { /* there's just one subdomain (or zero if they have not been detected */ 2773 PetscBool valid = PETSC_TRUE; 2774 2775 if (nneu) valid = PETSC_FALSE; 2776 if (valid && pressures) { 2777 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2778 } 2779 if (valid && checkb) { 2780 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2781 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2782 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2783 for (j=0;j<n_interior_dofs;j++) { 2784 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2785 valid = PETSC_FALSE; 2786 break; 2787 } 2788 } 2789 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2790 } 2791 if (valid) { 2792 benign_n = 1; 2793 ierr = PetscMalloc1(benign_n,&zerodiag_subs);CHKERRQ(ierr); 2794 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2795 zerodiag_subs[0] = zerodiag; 2796 } 2797 } 2798 if (checkb) { 2799 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2800 } 2801 } 2802 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2803 2804 if (!benign_n) { 2805 PetscInt n; 2806 2807 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2808 recompute_zerodiag = PETSC_FALSE; 2809 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2810 if (n) have_null = PETSC_FALSE; 2811 } 2812 2813 /* final check for null pressures */ 2814 if (zerodiag && pressures) { 2815 ierr = ISEqual(pressures,zerodiag,&have_null);CHKERRQ(ierr); 2816 } 2817 2818 if (recompute_zerodiag) { 2819 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2820 if (benign_n == 1) { 2821 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2822 zerodiag = zerodiag_subs[0]; 2823 } else { 2824 PetscInt i,nzn,*new_idxs; 2825 2826 nzn = 0; 2827 for (i=0;i<benign_n;i++) { 2828 PetscInt ns; 2829 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2830 nzn += ns; 2831 } 2832 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2833 nzn = 0; 2834 for (i=0;i<benign_n;i++) { 2835 PetscInt ns,*idxs; 2836 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2837 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2838 ierr = PetscArraycpy(new_idxs+nzn,idxs,ns);CHKERRQ(ierr); 2839 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2840 nzn += ns; 2841 } 2842 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2843 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2844 } 2845 have_null = PETSC_FALSE; 2846 } 2847 2848 /* determines if the coarse solver will be singular or not */ 2849 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 2850 2851 /* Prepare matrix to compute no-net-flux */ 2852 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2853 Mat A,loc_divudotp; 2854 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2855 IS row,col,isused = NULL; 2856 PetscInt M,N,n,st,n_isused; 2857 2858 if (pressures) { 2859 isused = pressures; 2860 } else { 2861 isused = zerodiag_save; 2862 } 2863 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2864 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2865 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2866 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2867 n_isused = 0; 2868 if (isused) { 2869 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2870 } 2871 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 2872 st = st-n_isused; 2873 if (n) { 2874 const PetscInt *gidxs; 2875 2876 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2877 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2878 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2879 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2880 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2881 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2882 } else { 2883 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2884 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2885 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2886 } 2887 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2888 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2889 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2890 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2891 ierr = ISDestroy(&row);CHKERRQ(ierr); 2892 ierr = ISDestroy(&col);CHKERRQ(ierr); 2893 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2894 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2895 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2896 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2897 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2898 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2899 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2900 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2901 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2902 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2903 } 2904 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2905 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2906 if (bzerodiag) { 2907 PetscInt i; 2908 2909 for (i=0;i<bsp;i++) { 2910 ierr = ISDestroy(&bzerodiag[i]);CHKERRQ(ierr); 2911 } 2912 ierr = PetscFree(bzerodiag);CHKERRQ(ierr); 2913 } 2914 pcbddc->benign_n = benign_n; 2915 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2916 2917 /* determines if the problem has subdomains with 0 pressure block */ 2918 have_null = (PetscBool)(!!pcbddc->benign_n); 2919 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 2920 2921 project_b0: 2922 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2923 /* change of basis and p0 dofs */ 2924 if (pcbddc->benign_n) { 2925 PetscInt i,s,*nnz; 2926 2927 /* local change of basis for pressures */ 2928 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2929 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2930 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2931 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2932 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2933 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2934 for (i=0;i<pcbddc->benign_n;i++) { 2935 const PetscInt *idxs; 2936 PetscInt nzs,j; 2937 2938 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2939 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2940 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2941 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2942 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2943 } 2944 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2945 ierr = MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2946 ierr = PetscFree(nnz);CHKERRQ(ierr); 2947 /* set identity by default */ 2948 for (i=0;i<n;i++) { 2949 ierr = MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);CHKERRQ(ierr); 2950 } 2951 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2952 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2953 /* set change on pressures */ 2954 for (s=0;s<pcbddc->benign_n;s++) { 2955 PetscScalar *array; 2956 const PetscInt *idxs; 2957 PetscInt nzs; 2958 2959 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2960 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2961 for (i=0;i<nzs-1;i++) { 2962 PetscScalar vals[2]; 2963 PetscInt cols[2]; 2964 2965 cols[0] = idxs[i]; 2966 cols[1] = idxs[nzs-1]; 2967 vals[0] = 1.; 2968 vals[1] = 1.; 2969 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2970 } 2971 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2972 for (i=0;i<nzs-1;i++) array[i] = -1.; 2973 array[nzs-1] = 1.; 2974 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2975 /* store local idxs for p0 */ 2976 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2977 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2978 ierr = PetscFree(array);CHKERRQ(ierr); 2979 } 2980 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2981 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 2983 /* project if needed */ 2984 if (pcbddc->benign_change_explicit) { 2985 Mat M; 2986 2987 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2988 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2989 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2990 ierr = MatDestroy(&M);CHKERRQ(ierr); 2991 } 2992 /* store global idxs for p0 */ 2993 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2994 } 2995 *zerodiaglocal = zerodiag; 2996 PetscFunctionReturn(0); 2997 } 2998 2999 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3000 { 3001 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3002 PetscScalar *array; 3003 PetscErrorCode ierr; 3004 3005 PetscFunctionBegin; 3006 if (!pcbddc->benign_sf) { 3007 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 3008 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 3009 } 3010 if (get) { 3011 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3012 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);CHKERRQ(ierr); 3013 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);CHKERRQ(ierr); 3014 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3015 } else { 3016 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 3017 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);CHKERRQ(ierr); 3018 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);CHKERRQ(ierr); 3019 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 3020 } 3021 PetscFunctionReturn(0); 3022 } 3023 3024 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3025 { 3026 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3027 PetscErrorCode ierr; 3028 3029 PetscFunctionBegin; 3030 /* TODO: add error checking 3031 - avoid nested pop (or push) calls. 3032 - cannot push before pop. 3033 - cannot call this if pcbddc->local_mat is NULL 3034 */ 3035 if (!pcbddc->benign_n) { 3036 PetscFunctionReturn(0); 3037 } 3038 if (pop) { 3039 if (pcbddc->benign_change_explicit) { 3040 IS is_p0; 3041 MatReuse reuse; 3042 3043 /* extract B_0 */ 3044 reuse = MAT_INITIAL_MATRIX; 3045 if (pcbddc->benign_B0) { 3046 reuse = MAT_REUSE_MATRIX; 3047 } 3048 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 3049 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 3050 /* remove rows and cols from local problem */ 3051 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 3052 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3053 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 3054 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3055 } else { 3056 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3057 PetscScalar *vals; 3058 PetscInt i,n,*idxs_ins; 3059 3060 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 3061 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 3062 if (!pcbddc->benign_B0) { 3063 PetscInt *nnz; 3064 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 3065 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 3066 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3067 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 3068 for (i=0;i<pcbddc->benign_n;i++) { 3069 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 3070 nnz[i] = n - nnz[i]; 3071 } 3072 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 3073 ierr = MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3074 ierr = PetscFree(nnz);CHKERRQ(ierr); 3075 } 3076 3077 for (i=0;i<pcbddc->benign_n;i++) { 3078 PetscScalar *array; 3079 PetscInt *idxs,j,nz,cum; 3080 3081 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 3082 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3083 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3084 for (j=0;j<nz;j++) vals[j] = 1.; 3085 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3086 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 3087 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 3088 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 3089 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 3090 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3091 cum = 0; 3092 for (j=0;j<n;j++) { 3093 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3094 vals[cum] = array[j]; 3095 idxs_ins[cum] = j; 3096 cum++; 3097 } 3098 } 3099 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3100 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3101 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3102 } 3103 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3104 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3105 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3106 } 3107 } else { /* push */ 3108 if (pcbddc->benign_change_explicit) { 3109 PetscInt i; 3110 3111 for (i=0;i<pcbddc->benign_n;i++) { 3112 PetscScalar *B0_vals; 3113 PetscInt *B0_cols,B0_ncol; 3114 3115 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3116 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3117 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3118 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3119 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3120 } 3121 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3122 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3123 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3124 } 3125 PetscFunctionReturn(0); 3126 } 3127 3128 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3129 { 3130 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3131 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3132 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3133 PetscBLASInt *B_iwork,*B_ifail; 3134 PetscScalar *work,lwork; 3135 PetscScalar *St,*S,*eigv; 3136 PetscScalar *Sarray,*Starray; 3137 PetscReal *eigs,thresh,lthresh,uthresh; 3138 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3139 PetscBool allocated_S_St; 3140 #if defined(PETSC_USE_COMPLEX) 3141 PetscReal *rwork; 3142 #endif 3143 PetscErrorCode ierr; 3144 3145 PetscFunctionBegin; 3146 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3147 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3148 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3149 ierr = PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3150 3151 if (pcbddc->dbg_flag) { 3152 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3155 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3156 } 3157 3158 if (pcbddc->dbg_flag) { 3159 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);CHKERRQ(ierr); 3160 } 3161 3162 /* max size of subsets */ 3163 mss = 0; 3164 for (i=0;i<sub_schurs->n_subs;i++) { 3165 PetscInt subset_size; 3166 3167 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3168 mss = PetscMax(mss,subset_size); 3169 } 3170 3171 /* min/max and threshold */ 3172 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3173 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3174 nmax = PetscMax(nmin,nmax); 3175 allocated_S_St = PETSC_FALSE; 3176 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3177 allocated_S_St = PETSC_TRUE; 3178 } 3179 3180 /* allocate lapack workspace */ 3181 cum = cum2 = 0; 3182 maxneigs = 0; 3183 for (i=0;i<sub_schurs->n_subs;i++) { 3184 PetscInt n,subset_size; 3185 3186 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3187 n = PetscMin(subset_size,nmax); 3188 cum += subset_size; 3189 cum2 += subset_size*n; 3190 maxneigs = PetscMax(maxneigs,n); 3191 } 3192 lwork = 0; 3193 if (mss) { 3194 if (sub_schurs->is_symmetric) { 3195 PetscScalar sdummy = 0.; 3196 PetscBLASInt B_itype = 1; 3197 PetscBLASInt B_N = mss, idummy = 0; 3198 PetscReal rdummy = 0.,zero = 0.0; 3199 PetscReal eps = 0.0; /* dlamch? */ 3200 3201 B_lwork = -1; 3202 /* some implementations may complain about NULL pointers, even if we are querying */ 3203 S = &sdummy; 3204 St = &sdummy; 3205 eigs = &rdummy; 3206 eigv = &sdummy; 3207 B_iwork = &idummy; 3208 B_ifail = &idummy; 3209 #if defined(PETSC_USE_COMPLEX) 3210 rwork = &rdummy; 3211 #endif 3212 thresh = 1.0; 3213 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3214 #if defined(PETSC_USE_COMPLEX) 3215 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3216 #else 3217 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3218 #endif 3219 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3220 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3221 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3222 } 3223 3224 nv = 0; 3225 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3226 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3227 } 3228 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3229 if (allocated_S_St) { 3230 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3231 } 3232 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3233 #if defined(PETSC_USE_COMPLEX) 3234 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3235 #endif 3236 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3237 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3238 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3239 nv+cum,&pcbddc->adaptive_constraints_idxs, 3240 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3241 ierr = PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);CHKERRQ(ierr); 3242 3243 maxneigs = 0; 3244 cum = cumarray = 0; 3245 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3246 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3247 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3248 const PetscInt *idxs; 3249 3250 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3251 for (cum=0;cum<nv;cum++) { 3252 pcbddc->adaptive_constraints_n[cum] = 1; 3253 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3254 pcbddc->adaptive_constraints_data[cum] = 1.0; 3255 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3256 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3257 } 3258 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3259 } 3260 3261 if (mss) { /* multilevel */ 3262 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3263 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3264 } 3265 3266 lthresh = pcbddc->adaptive_threshold[0]; 3267 uthresh = pcbddc->adaptive_threshold[1]; 3268 for (i=0;i<sub_schurs->n_subs;i++) { 3269 const PetscInt *idxs; 3270 PetscReal upper,lower; 3271 PetscInt j,subset_size,eigs_start = 0; 3272 PetscBLASInt B_N; 3273 PetscBool same_data = PETSC_FALSE; 3274 PetscBool scal = PETSC_FALSE; 3275 3276 if (pcbddc->use_deluxe_scaling) { 3277 upper = PETSC_MAX_REAL; 3278 lower = uthresh; 3279 } else { 3280 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3281 upper = 1./uthresh; 3282 lower = 0.; 3283 } 3284 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3285 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3286 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3287 /* this is experimental: we assume the dofs have been properly grouped to have 3288 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3289 if (!sub_schurs->is_posdef) { 3290 Mat T; 3291 3292 for (j=0;j<subset_size;j++) { 3293 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3294 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3295 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3296 ierr = MatDestroy(&T);CHKERRQ(ierr); 3297 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3298 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3299 ierr = MatDestroy(&T);CHKERRQ(ierr); 3300 if (sub_schurs->change_primal_sub) { 3301 PetscInt nz,k; 3302 const PetscInt *idxs; 3303 3304 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3305 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3306 for (k=0;k<nz;k++) { 3307 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3308 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3309 } 3310 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3311 } 3312 scal = PETSC_TRUE; 3313 break; 3314 } 3315 } 3316 } 3317 3318 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3319 if (sub_schurs->is_symmetric) { 3320 PetscInt j,k; 3321 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3322 ierr = PetscArrayzero(S,subset_size*subset_size);CHKERRQ(ierr); 3323 ierr = PetscArrayzero(St,subset_size*subset_size);CHKERRQ(ierr); 3324 } 3325 for (j=0;j<subset_size;j++) { 3326 for (k=j;k<subset_size;k++) { 3327 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3328 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3329 } 3330 } 3331 } else { 3332 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3333 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3334 } 3335 } else { 3336 S = Sarray + cumarray; 3337 St = Starray + cumarray; 3338 } 3339 /* see if we can save some work */ 3340 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3341 ierr = PetscArraycmp(S,St,subset_size*subset_size,&same_data);CHKERRQ(ierr); 3342 } 3343 3344 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3345 B_neigs = 0; 3346 } else { 3347 if (sub_schurs->is_symmetric) { 3348 PetscBLASInt B_itype = 1; 3349 PetscBLASInt B_IL, B_IU; 3350 PetscReal eps = -1.0; /* dlamch? */ 3351 PetscInt nmin_s; 3352 PetscBool compute_range; 3353 3354 B_neigs = 0; 3355 compute_range = (PetscBool)!same_data; 3356 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3357 3358 if (pcbddc->dbg_flag) { 3359 PetscInt nc = 0; 3360 3361 if (sub_schurs->change_primal_sub) { 3362 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3363 } 3364 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3365 } 3366 3367 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3368 if (compute_range) { 3369 3370 /* ask for eigenvalues larger than thresh */ 3371 if (sub_schurs->is_posdef) { 3372 #if defined(PETSC_USE_COMPLEX) 3373 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3374 #else 3375 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3376 #endif 3377 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3378 } else { /* no theory so far, but it works nicely */ 3379 PetscInt recipe = 0,recipe_m = 1; 3380 PetscReal bb[2]; 3381 3382 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3383 switch (recipe) { 3384 case 0: 3385 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3386 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3387 #if defined(PETSC_USE_COMPLEX) 3388 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3389 #else 3390 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3391 #endif 3392 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3393 break; 3394 case 1: 3395 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3396 #if defined(PETSC_USE_COMPLEX) 3397 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3398 #else 3399 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3400 #endif 3401 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3402 if (!scal) { 3403 PetscBLASInt B_neigs2 = 0; 3404 3405 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3406 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3407 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3408 #if defined(PETSC_USE_COMPLEX) 3409 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3410 #else 3411 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3412 #endif 3413 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3414 B_neigs += B_neigs2; 3415 } 3416 break; 3417 case 2: 3418 if (scal) { 3419 bb[0] = PETSC_MIN_REAL; 3420 bb[1] = 0; 3421 #if defined(PETSC_USE_COMPLEX) 3422 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3423 #else 3424 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3425 #endif 3426 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3427 } else { 3428 PetscBLASInt B_neigs2 = 0; 3429 PetscBool import = PETSC_FALSE; 3430 3431 lthresh = PetscMax(lthresh,0.0); 3432 if (lthresh > 0.0) { 3433 bb[0] = PETSC_MIN_REAL; 3434 bb[1] = lthresh*lthresh; 3435 3436 import = PETSC_TRUE; 3437 #if defined(PETSC_USE_COMPLEX) 3438 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3439 #else 3440 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3441 #endif 3442 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3443 } 3444 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3445 bb[1] = PETSC_MAX_REAL; 3446 if (import) { 3447 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3448 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3449 } 3450 #if defined(PETSC_USE_COMPLEX) 3451 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3452 #else 3453 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3454 #endif 3455 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3456 B_neigs += B_neigs2; 3457 } 3458 break; 3459 case 3: 3460 if (scal) { 3461 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3462 } else { 3463 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3464 } 3465 if (!scal) { 3466 bb[0] = uthresh; 3467 bb[1] = PETSC_MAX_REAL; 3468 #if defined(PETSC_USE_COMPLEX) 3469 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3470 #else 3471 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3472 #endif 3473 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3474 } 3475 if (recipe_m > 0 && B_N - B_neigs > 0) { 3476 PetscBLASInt B_neigs2 = 0; 3477 3478 B_IL = 1; 3479 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3480 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3481 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3482 #if defined(PETSC_USE_COMPLEX) 3483 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3484 #else 3485 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3486 #endif 3487 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3488 B_neigs += B_neigs2; 3489 } 3490 break; 3491 case 4: 3492 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3493 #if defined(PETSC_USE_COMPLEX) 3494 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3495 #else 3496 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3497 #endif 3498 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3499 { 3500 PetscBLASInt B_neigs2 = 0; 3501 3502 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3503 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3504 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3505 #if defined(PETSC_USE_COMPLEX) 3506 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3507 #else 3508 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3509 #endif 3510 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3511 B_neigs += B_neigs2; 3512 } 3513 break; 3514 case 5: /* same as before: first compute all eigenvalues, then filter */ 3515 #if defined(PETSC_USE_COMPLEX) 3516 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3517 #else 3518 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3519 #endif 3520 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3521 { 3522 PetscInt e,k,ne; 3523 for (e=0,ne=0;e<B_neigs;e++) { 3524 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3525 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3526 eigs[ne] = eigs[e]; 3527 ne++; 3528 } 3529 } 3530 ierr = PetscArraycpy(eigv,S,B_N*ne);CHKERRQ(ierr); 3531 B_neigs = ne; 3532 } 3533 break; 3534 default: 3535 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3536 } 3537 } 3538 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3539 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3540 B_IL = 1; 3541 #if defined(PETSC_USE_COMPLEX) 3542 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3543 #else 3544 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3545 #endif 3546 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3547 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3548 PetscInt k; 3549 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3550 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3551 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3552 nmin = nmax; 3553 ierr = PetscArrayzero(eigv,subset_size*nmax);CHKERRQ(ierr); 3554 for (k=0;k<nmax;k++) { 3555 eigs[k] = 1./PETSC_SMALL; 3556 eigv[k*(subset_size+1)] = 1.0; 3557 } 3558 } 3559 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3560 if (B_ierr) { 3561 if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3562 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3563 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3564 } 3565 3566 if (B_neigs > nmax) { 3567 if (pcbddc->dbg_flag) { 3568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);CHKERRQ(ierr); 3569 } 3570 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3571 B_neigs = nmax; 3572 } 3573 3574 nmin_s = PetscMin(nmin,B_N); 3575 if (B_neigs < nmin_s) { 3576 PetscBLASInt B_neigs2 = 0; 3577 3578 if (pcbddc->use_deluxe_scaling) { 3579 if (scal) { 3580 B_IU = nmin_s; 3581 B_IL = B_neigs + 1; 3582 } else { 3583 B_IL = B_N - nmin_s + 1; 3584 B_IU = B_N - B_neigs; 3585 } 3586 } else { 3587 B_IL = B_neigs + 1; 3588 B_IU = nmin_s; 3589 } 3590 if (pcbddc->dbg_flag) { 3591 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);CHKERRQ(ierr); 3592 } 3593 if (sub_schurs->is_symmetric) { 3594 PetscInt j,k; 3595 for (j=0;j<subset_size;j++) { 3596 for (k=j;k<subset_size;k++) { 3597 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3598 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3599 } 3600 } 3601 } else { 3602 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3603 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3604 } 3605 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3606 #if defined(PETSC_USE_COMPLEX) 3607 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3608 #else 3609 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3610 #endif 3611 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3612 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3613 B_neigs += B_neigs2; 3614 } 3615 if (B_ierr) { 3616 if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3617 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3618 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3619 } 3620 if (pcbddc->dbg_flag) { 3621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3622 for (j=0;j<B_neigs;j++) { 3623 if (eigs[j] == 0.0) { 3624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3625 } else { 3626 if (pcbddc->use_deluxe_scaling) { 3627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3628 } else { 3629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3630 } 3631 } 3632 } 3633 } 3634 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3635 } 3636 /* change the basis back to the original one */ 3637 if (sub_schurs->change) { 3638 Mat change,phi,phit; 3639 3640 if (pcbddc->dbg_flag > 2) { 3641 PetscInt ii; 3642 for (ii=0;ii<B_neigs;ii++) { 3643 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3644 for (j=0;j<B_N;j++) { 3645 #if defined(PETSC_USE_COMPLEX) 3646 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3647 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3649 #else 3650 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3651 #endif 3652 } 3653 } 3654 } 3655 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3656 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3657 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3658 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3659 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3660 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3661 } 3662 maxneigs = PetscMax(B_neigs,maxneigs); 3663 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3664 if (B_neigs) { 3665 ierr = PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);CHKERRQ(ierr); 3666 3667 if (pcbddc->dbg_flag > 1) { 3668 PetscInt ii; 3669 for (ii=0;ii<B_neigs;ii++) { 3670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3671 for (j=0;j<B_N;j++) { 3672 #if defined(PETSC_USE_COMPLEX) 3673 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3674 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3675 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3676 #else 3677 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3678 #endif 3679 } 3680 } 3681 } 3682 ierr = PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);CHKERRQ(ierr); 3683 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3684 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3685 cum++; 3686 } 3687 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3688 /* shift for next computation */ 3689 cumarray += subset_size*subset_size; 3690 } 3691 if (pcbddc->dbg_flag) { 3692 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3693 } 3694 3695 if (mss) { 3696 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3697 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3698 /* destroy matrices (junk) */ 3699 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3700 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3701 } 3702 if (allocated_S_St) { 3703 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3704 } 3705 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3706 #if defined(PETSC_USE_COMPLEX) 3707 ierr = PetscFree(rwork);CHKERRQ(ierr); 3708 #endif 3709 if (pcbddc->dbg_flag) { 3710 PetscInt maxneigs_r; 3711 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 3712 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);CHKERRQ(ierr); 3713 } 3714 ierr = PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3715 PetscFunctionReturn(0); 3716 } 3717 3718 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3719 { 3720 PetscScalar *coarse_submat_vals; 3721 PetscErrorCode ierr; 3722 3723 PetscFunctionBegin; 3724 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3725 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3726 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3727 3728 /* Setup local neumann solver ksp_R */ 3729 /* PCBDDCSetUpLocalScatters should be called first! */ 3730 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3731 3732 /* 3733 Setup local correction and local part of coarse basis. 3734 Gives back the dense local part of the coarse matrix in column major ordering 3735 */ 3736 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3737 3738 /* Compute total number of coarse nodes and setup coarse solver */ 3739 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3740 3741 /* free */ 3742 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3743 PetscFunctionReturn(0); 3744 } 3745 3746 PetscErrorCode PCBDDCResetCustomization(PC pc) 3747 { 3748 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3749 PetscErrorCode ierr; 3750 3751 PetscFunctionBegin; 3752 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3753 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3754 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3755 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3756 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3757 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3758 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3759 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3760 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3761 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3762 PetscFunctionReturn(0); 3763 } 3764 3765 PetscErrorCode PCBDDCResetTopography(PC pc) 3766 { 3767 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3768 PetscInt i; 3769 PetscErrorCode ierr; 3770 3771 PetscFunctionBegin; 3772 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3773 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3774 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3775 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3776 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3777 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3778 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3779 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3780 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3781 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3782 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3783 for (i=0;i<pcbddc->n_local_subs;i++) { 3784 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3785 } 3786 pcbddc->n_local_subs = 0; 3787 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3788 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3789 pcbddc->graphanalyzed = PETSC_FALSE; 3790 pcbddc->recompute_topography = PETSC_TRUE; 3791 pcbddc->corner_selected = PETSC_FALSE; 3792 PetscFunctionReturn(0); 3793 } 3794 3795 PetscErrorCode PCBDDCResetSolvers(PC pc) 3796 { 3797 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3798 PetscErrorCode ierr; 3799 3800 PetscFunctionBegin; 3801 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3802 if (pcbddc->coarse_phi_B) { 3803 PetscScalar *array; 3804 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3805 ierr = PetscFree(array);CHKERRQ(ierr); 3806 } 3807 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3808 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3809 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3810 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3811 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3812 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3813 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3814 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3815 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3816 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3817 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3818 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3819 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3820 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3821 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3822 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3823 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3824 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3825 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3826 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3827 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3828 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3829 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3830 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3831 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3832 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3833 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3834 if (pcbddc->benign_zerodiag_subs) { 3835 PetscInt i; 3836 for (i=0;i<pcbddc->benign_n;i++) { 3837 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3838 } 3839 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3840 } 3841 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3842 PetscFunctionReturn(0); 3843 } 3844 3845 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3846 { 3847 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3848 PC_IS *pcis = (PC_IS*)pc->data; 3849 VecType impVecType; 3850 PetscInt n_constraints,n_R,old_size; 3851 PetscErrorCode ierr; 3852 3853 PetscFunctionBegin; 3854 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3855 n_R = pcis->n - pcbddc->n_vertices; 3856 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3857 /* local work vectors (try to avoid unneeded work)*/ 3858 /* R nodes */ 3859 old_size = -1; 3860 if (pcbddc->vec1_R) { 3861 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3862 } 3863 if (n_R != old_size) { 3864 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3865 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3866 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3867 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3868 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3869 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3870 } 3871 /* local primal dofs */ 3872 old_size = -1; 3873 if (pcbddc->vec1_P) { 3874 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3875 } 3876 if (pcbddc->local_primal_size != old_size) { 3877 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3878 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3879 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3880 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3881 } 3882 /* local explicit constraints */ 3883 old_size = -1; 3884 if (pcbddc->vec1_C) { 3885 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3886 } 3887 if (n_constraints && n_constraints != old_size) { 3888 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3889 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3890 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3891 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3892 } 3893 PetscFunctionReturn(0); 3894 } 3895 3896 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3897 { 3898 PetscErrorCode ierr; 3899 /* pointers to pcis and pcbddc */ 3900 PC_IS* pcis = (PC_IS*)pc->data; 3901 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3902 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3903 /* submatrices of local problem */ 3904 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3905 /* submatrices of local coarse problem */ 3906 Mat S_VV,S_CV,S_VC,S_CC; 3907 /* working matrices */ 3908 Mat C_CR; 3909 /* additional working stuff */ 3910 PC pc_R; 3911 Mat F,Brhs = NULL; 3912 Vec dummy_vec; 3913 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3914 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3915 PetscScalar *work; 3916 PetscInt *idx_V_B; 3917 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3918 PetscInt i,n_R,n_D,n_B; 3919 PetscScalar one=1.0,m_one=-1.0; 3920 3921 PetscFunctionBegin; 3922 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3923 ierr = PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3924 3925 /* Set Non-overlapping dimensions */ 3926 n_vertices = pcbddc->n_vertices; 3927 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3928 n_B = pcis->n_B; 3929 n_D = pcis->n - n_B; 3930 n_R = pcis->n - n_vertices; 3931 3932 /* vertices in boundary numbering */ 3933 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3934 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3935 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3936 3937 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3938 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3939 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3940 ierr = MatDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3942 ierr = MatDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3943 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3944 ierr = MatDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3945 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3946 ierr = MatDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3947 3948 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3949 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3950 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3951 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3952 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3953 lda_rhs = n_R; 3954 need_benign_correction = PETSC_FALSE; 3955 if (isLU || isCHOL) { 3956 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3957 } else if (sub_schurs && sub_schurs->reuse_solver) { 3958 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3959 MatFactorType type; 3960 3961 F = reuse_solver->F; 3962 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3963 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3964 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3965 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3966 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3967 } else F = NULL; 3968 3969 /* determine if we can use a sparse right-hand side */ 3970 sparserhs = PETSC_FALSE; 3971 if (F) { 3972 MatSolverType solver; 3973 3974 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3975 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3976 } 3977 3978 /* allocate workspace */ 3979 n = 0; 3980 if (n_constraints) { 3981 n += lda_rhs*n_constraints; 3982 } 3983 if (n_vertices) { 3984 n = PetscMax(2*lda_rhs*n_vertices,n); 3985 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3986 } 3987 if (!pcbddc->symmetric_primal) { 3988 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3989 } 3990 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3991 3992 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3993 dummy_vec = NULL; 3994 if (need_benign_correction && lda_rhs != n_R && F) { 3995 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);CHKERRQ(ierr); 3996 ierr = VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);CHKERRQ(ierr); 3997 ierr = VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);CHKERRQ(ierr); 3998 } 3999 4000 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 4001 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 4002 4003 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4004 if (n_constraints) { 4005 Mat M3,C_B; 4006 IS is_aux; 4007 4008 /* Extract constraints on R nodes: C_{CR} */ 4009 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 4010 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 4011 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4012 4013 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4014 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4015 if (!sparserhs) { 4016 ierr = PetscArrayzero(work,lda_rhs*n_constraints);CHKERRQ(ierr); 4017 for (i=0;i<n_constraints;i++) { 4018 const PetscScalar *row_cmat_values; 4019 const PetscInt *row_cmat_indices; 4020 PetscInt size_of_constraint,j; 4021 4022 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4023 for (j=0;j<size_of_constraint;j++) { 4024 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4025 } 4026 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4027 } 4028 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 4029 } else { 4030 Mat tC_CR; 4031 4032 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4033 if (lda_rhs != n_R) { 4034 PetscScalar *aa; 4035 PetscInt r,*ii,*jj; 4036 PetscBool done; 4037 4038 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4039 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4040 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 4041 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 4042 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4043 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4044 } else { 4045 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 4046 tC_CR = C_CR; 4047 } 4048 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 4049 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 4050 } 4051 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 4052 if (F) { 4053 if (need_benign_correction) { 4054 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4055 4056 /* rhs is already zero on interior dofs, no need to change the rhs */ 4057 ierr = PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);CHKERRQ(ierr); 4058 } 4059 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 4060 if (need_benign_correction) { 4061 PetscScalar *marr; 4062 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4063 4064 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4065 if (lda_rhs != n_R) { 4066 for (i=0;i<n_constraints;i++) { 4067 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4068 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4069 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4070 } 4071 } else { 4072 for (i=0;i<n_constraints;i++) { 4073 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4074 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4075 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4076 } 4077 } 4078 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4079 } 4080 } else { 4081 PetscScalar *marr; 4082 4083 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4084 for (i=0;i<n_constraints;i++) { 4085 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4086 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 4087 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4088 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4089 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4090 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4091 } 4092 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4093 } 4094 if (sparserhs) { 4095 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4096 } 4097 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4098 if (!pcbddc->switch_static) { 4099 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4100 for (i=0;i<n_constraints;i++) { 4101 Vec r, b; 4102 ierr = MatDenseGetColumnVecRead(local_auxmat2_R,i,&r);CHKERRQ(ierr); 4103 ierr = MatDenseGetColumnVec(pcbddc->local_auxmat2,i,&b);CHKERRQ(ierr); 4104 ierr = VecScatterBegin(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4105 ierr = VecScatterEnd(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4106 ierr = MatDenseRestoreColumnVec(pcbddc->local_auxmat2,i,&b);CHKERRQ(ierr); 4107 ierr = MatDenseRestoreColumnVecRead(local_auxmat2_R,i,&r);CHKERRQ(ierr); 4108 } 4109 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4110 } else { 4111 if (lda_rhs != n_R) { 4112 IS dummy; 4113 4114 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4115 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4116 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4117 } else { 4118 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4119 pcbddc->local_auxmat2 = local_auxmat2_R; 4120 } 4121 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4122 } 4123 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4124 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4125 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4126 if (isCHOL) { 4127 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4128 } else { 4129 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4130 } 4131 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4132 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4133 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4134 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4135 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4136 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4137 } 4138 4139 /* Get submatrices from subdomain matrix */ 4140 if (n_vertices) { 4141 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4142 PetscBool oldpin; 4143 #endif 4144 PetscBool isaij; 4145 IS is_aux; 4146 4147 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4148 IS tis; 4149 4150 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4151 ierr = ISSort(tis);CHKERRQ(ierr); 4152 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4153 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4154 } else { 4155 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4156 } 4157 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4158 oldpin = pcbddc->local_mat->boundtocpu; 4159 #endif 4160 ierr = MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);CHKERRQ(ierr); 4161 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4162 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4163 ierr = PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);CHKERRQ(ierr); 4164 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4165 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4166 } 4167 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4168 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4169 ierr = MatBindToCPU(pcbddc->local_mat,oldpin);CHKERRQ(ierr); 4170 #endif 4171 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4172 } 4173 4174 /* Matrix of coarse basis functions (local) */ 4175 if (pcbddc->coarse_phi_B) { 4176 PetscInt on_B,on_primal,on_D=n_D; 4177 if (pcbddc->coarse_phi_D) { 4178 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4179 } 4180 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4181 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4182 PetscScalar *marray; 4183 4184 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4185 ierr = PetscFree(marray);CHKERRQ(ierr); 4186 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4187 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4188 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4189 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4190 } 4191 } 4192 4193 if (!pcbddc->coarse_phi_B) { 4194 PetscScalar *marr; 4195 4196 /* memory size */ 4197 n = n_B*pcbddc->local_primal_size; 4198 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4199 if (!pcbddc->symmetric_primal) n *= 2; 4200 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4201 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4202 marr += n_B*pcbddc->local_primal_size; 4203 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4204 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4205 marr += n_D*pcbddc->local_primal_size; 4206 } 4207 if (!pcbddc->symmetric_primal) { 4208 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4209 marr += n_B*pcbddc->local_primal_size; 4210 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4211 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4212 } 4213 } else { 4214 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4215 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4216 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4217 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4218 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4219 } 4220 } 4221 } 4222 4223 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4224 p0_lidx_I = NULL; 4225 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4226 const PetscInt *idxs; 4227 4228 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4229 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4230 for (i=0;i<pcbddc->benign_n;i++) { 4231 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4232 } 4233 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4234 } 4235 4236 /* vertices */ 4237 if (n_vertices) { 4238 PetscBool restoreavr = PETSC_FALSE; 4239 4240 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4241 4242 if (n_R) { 4243 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4244 PetscBLASInt B_N,B_one = 1; 4245 const PetscScalar *x; 4246 PetscScalar *y; 4247 4248 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4249 if (need_benign_correction) { 4250 ISLocalToGlobalMapping RtoN; 4251 IS is_p0; 4252 PetscInt *idxs_p0,n; 4253 4254 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4255 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4256 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4257 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4258 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4259 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4260 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4261 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4262 } 4263 4264 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4265 if (!sparserhs || need_benign_correction) { 4266 if (lda_rhs == n_R) { 4267 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4268 } else { 4269 PetscScalar *av,*array; 4270 const PetscInt *xadj,*adjncy; 4271 PetscInt n; 4272 PetscBool flg_row; 4273 4274 array = work+lda_rhs*n_vertices; 4275 ierr = PetscArrayzero(array,lda_rhs*n_vertices);CHKERRQ(ierr); 4276 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4277 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4278 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4279 for (i=0;i<n;i++) { 4280 PetscInt j; 4281 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4282 } 4283 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4284 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4285 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4286 } 4287 if (need_benign_correction) { 4288 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4289 PetscScalar *marr; 4290 4291 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4292 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4293 4294 | 0 0 0 | (V) 4295 L = | 0 0 -1 | (P-p0) 4296 | 0 0 -1 | (p0) 4297 4298 */ 4299 for (i=0;i<reuse_solver->benign_n;i++) { 4300 const PetscScalar *vals; 4301 const PetscInt *idxs,*idxs_zero; 4302 PetscInt n,j,nz; 4303 4304 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4305 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4306 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4307 for (j=0;j<n;j++) { 4308 PetscScalar val = vals[j]; 4309 PetscInt k,col = idxs[j]; 4310 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4311 } 4312 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4313 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4314 } 4315 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4316 } 4317 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4318 Brhs = A_RV; 4319 } else { 4320 Mat tA_RVT,A_RVT; 4321 4322 if (!pcbddc->symmetric_primal) { 4323 /* A_RV already scaled by -1 */ 4324 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4325 } else { 4326 restoreavr = PETSC_TRUE; 4327 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4328 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4329 A_RVT = A_VR; 4330 } 4331 if (lda_rhs != n_R) { 4332 PetscScalar *aa; 4333 PetscInt r,*ii,*jj; 4334 PetscBool done; 4335 4336 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4337 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4338 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4339 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4340 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4341 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4342 } else { 4343 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4344 tA_RVT = A_RVT; 4345 } 4346 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4347 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4348 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4349 } 4350 if (F) { 4351 /* need to correct the rhs */ 4352 if (need_benign_correction) { 4353 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4354 PetscScalar *marr; 4355 4356 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4357 if (lda_rhs != n_R) { 4358 for (i=0;i<n_vertices;i++) { 4359 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4360 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4361 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4362 } 4363 } else { 4364 for (i=0;i<n_vertices;i++) { 4365 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4366 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4367 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4368 } 4369 } 4370 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4371 } 4372 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4373 if (restoreavr) { 4374 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4375 } 4376 /* need to correct the solution */ 4377 if (need_benign_correction) { 4378 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4379 PetscScalar *marr; 4380 4381 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4382 if (lda_rhs != n_R) { 4383 for (i=0;i<n_vertices;i++) { 4384 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4385 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4386 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4387 } 4388 } else { 4389 for (i=0;i<n_vertices;i++) { 4390 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4391 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4392 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4393 } 4394 } 4395 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4396 } 4397 } else { 4398 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4399 for (i=0;i<n_vertices;i++) { 4400 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4401 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4402 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4403 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4404 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4405 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4406 } 4407 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4408 } 4409 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4410 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4411 /* S_VV and S_CV */ 4412 if (n_constraints) { 4413 Mat B; 4414 4415 ierr = PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);CHKERRQ(ierr); 4416 for (i=0;i<n_vertices;i++) { 4417 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4418 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4419 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4420 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4421 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4422 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4423 } 4424 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4425 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4426 ierr = MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);CHKERRQ(ierr); 4427 ierr = MatProductSetType(S_CV,MATPRODUCT_AB);CHKERRQ(ierr); 4428 ierr = MatProductSetFromOptions(S_CV);CHKERRQ(ierr); 4429 ierr = MatProductSymbolic(S_CV);CHKERRQ(ierr); 4430 ierr = MatProductNumeric(S_CV);CHKERRQ(ierr); 4431 ierr = MatProductClear(S_CV);CHKERRQ(ierr); 4432 4433 ierr = MatDestroy(&B);CHKERRQ(ierr); 4434 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4435 /* Reuse B = local_auxmat2_R * S_CV */ 4436 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);CHKERRQ(ierr); 4437 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4438 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4439 ierr = MatProductSymbolic(B);CHKERRQ(ierr); 4440 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4441 4442 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4443 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4444 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4445 ierr = MatDestroy(&B);CHKERRQ(ierr); 4446 } 4447 if (lda_rhs != n_R) { 4448 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4449 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4450 ierr = MatDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4451 } 4452 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4453 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4454 if (need_benign_correction) { 4455 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4456 PetscScalar *marr,*sums; 4457 4458 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4459 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4460 for (i=0;i<reuse_solver->benign_n;i++) { 4461 const PetscScalar *vals; 4462 const PetscInt *idxs,*idxs_zero; 4463 PetscInt n,j,nz; 4464 4465 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4466 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4467 for (j=0;j<n_vertices;j++) { 4468 PetscInt k; 4469 sums[j] = 0.; 4470 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4471 } 4472 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4473 for (j=0;j<n;j++) { 4474 PetscScalar val = vals[j]; 4475 PetscInt k; 4476 for (k=0;k<n_vertices;k++) { 4477 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4478 } 4479 } 4480 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4481 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4482 } 4483 ierr = PetscFree(sums);CHKERRQ(ierr); 4484 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4485 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4486 } 4487 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4488 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4489 ierr = MatDenseGetArrayRead(A_VV,&x);CHKERRQ(ierr); 4490 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4491 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4492 ierr = MatDenseRestoreArrayRead(A_VV,&x);CHKERRQ(ierr); 4493 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4494 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4495 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4496 } else { 4497 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4498 } 4499 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4500 4501 /* coarse basis functions */ 4502 for (i=0;i<n_vertices;i++) { 4503 Vec v; 4504 PetscScalar one = 1.0,zero = 0.0; 4505 4506 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4507 ierr = MatDenseGetColumnVec(pcbddc->coarse_phi_B,i,&v);CHKERRQ(ierr); 4508 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4509 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4510 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4511 PetscMPIInt rank; 4512 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),&rank);CHKERRMPI(ierr); 4513 if (rank > 1) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4514 } 4515 ierr = VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES);CHKERRQ(ierr); 4516 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4517 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 4518 ierr = MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i,&v);CHKERRQ(ierr); 4519 4520 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4521 PetscInt j; 4522 4523 ierr = MatDenseGetColumnVec(pcbddc->coarse_phi_D,i,&v);CHKERRQ(ierr); 4524 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4525 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4526 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4527 PetscMPIInt rank; 4528 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),&rank);CHKERRMPI(ierr); 4529 if (rank > 1) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4530 } 4531 for (j=0;j<pcbddc->benign_n;j++) {ierr = VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES);CHKERRQ(ierr);} 4532 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 4533 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 4534 ierr = MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i,&v);CHKERRQ(ierr); 4535 } 4536 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4537 } 4538 /* if n_R == 0 the object is not destroyed */ 4539 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4540 } 4541 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4542 4543 if (n_constraints) { 4544 Mat B; 4545 4546 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4547 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4548 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);CHKERRQ(ierr); 4549 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4550 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4551 ierr = MatProductSymbolic(B);CHKERRQ(ierr); 4552 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4553 4554 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4555 if (n_vertices) { 4556 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4557 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4558 } else { 4559 Mat S_VCt; 4560 4561 if (lda_rhs != n_R) { 4562 ierr = MatDestroy(&B);CHKERRQ(ierr); 4563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4564 ierr = MatDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4565 } 4566 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4567 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4568 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4569 } 4570 } 4571 ierr = MatDestroy(&B);CHKERRQ(ierr); 4572 /* coarse basis functions */ 4573 for (i=0;i<n_constraints;i++) { 4574 Vec v; 4575 4576 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4577 ierr = MatDenseGetColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v);CHKERRQ(ierr); 4578 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4579 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4580 ierr = MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v);CHKERRQ(ierr); 4581 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4582 PetscInt j; 4583 PetscScalar zero = 0.0; 4584 ierr = MatDenseGetColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v);CHKERRQ(ierr); 4585 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4586 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4587 for (j=0;j<pcbddc->benign_n;j++) {ierr = VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES);CHKERRQ(ierr);} 4588 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 4589 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 4590 ierr = MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v);CHKERRQ(ierr); 4591 } 4592 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4593 } 4594 } 4595 if (n_constraints) { 4596 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4597 } 4598 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4599 4600 /* coarse matrix entries relative to B_0 */ 4601 if (pcbddc->benign_n) { 4602 Mat B0_B,B0_BPHI; 4603 IS is_dummy; 4604 const PetscScalar *data; 4605 PetscInt j; 4606 4607 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4608 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4609 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4610 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4611 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4612 ierr = MatDenseGetArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4613 for (j=0;j<pcbddc->benign_n;j++) { 4614 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4615 for (i=0;i<pcbddc->local_primal_size;i++) { 4616 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4617 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4618 } 4619 } 4620 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4621 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4622 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4623 } 4624 4625 /* compute other basis functions for non-symmetric problems */ 4626 if (!pcbddc->symmetric_primal) { 4627 Mat B_V=NULL,B_C=NULL; 4628 PetscScalar *marray; 4629 4630 if (n_constraints) { 4631 Mat S_CCT,C_CRT; 4632 4633 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4634 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4635 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4636 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4637 if (n_vertices) { 4638 Mat S_VCT; 4639 4640 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4641 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4642 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4643 } 4644 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4645 } else { 4646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4647 } 4648 if (n_vertices && n_R) { 4649 PetscScalar *av,*marray; 4650 const PetscInt *xadj,*adjncy; 4651 PetscInt n; 4652 PetscBool flg_row; 4653 4654 /* B_V = B_V - A_VR^T */ 4655 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4656 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4657 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4658 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4659 for (i=0;i<n;i++) { 4660 PetscInt j; 4661 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4662 } 4663 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4664 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4665 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4666 } 4667 4668 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4669 if (n_vertices) { 4670 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4671 for (i=0;i<n_vertices;i++) { 4672 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4673 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4674 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4675 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4676 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4677 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4678 } 4679 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4680 } 4681 if (B_C) { 4682 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4683 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4684 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4685 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4686 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4687 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4688 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4689 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4690 } 4691 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4692 } 4693 /* coarse basis functions */ 4694 for (i=0;i<pcbddc->local_primal_size;i++) { 4695 Vec v; 4696 4697 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4698 ierr = MatDenseGetColumnVec(pcbddc->coarse_psi_B,i,&v);CHKERRQ(ierr); 4699 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4700 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4701 if (i<n_vertices) { 4702 PetscScalar one = 1.0; 4703 ierr = VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES);CHKERRQ(ierr); 4704 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 4705 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 4706 } 4707 ierr = MatDenseRestoreColumnVec(pcbddc->coarse_psi_B,i,&v);CHKERRQ(ierr); 4708 4709 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4710 ierr = MatDenseGetColumnVec(pcbddc->coarse_psi_D,i,&v);CHKERRQ(ierr); 4711 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4712 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4713 ierr = MatDenseRestoreColumnVec(pcbddc->coarse_psi_D,i,&v);CHKERRQ(ierr); 4714 } 4715 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4716 } 4717 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4718 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4719 } 4720 4721 /* free memory */ 4722 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4723 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4724 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4725 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4726 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4727 ierr = PetscFree(work);CHKERRQ(ierr); 4728 if (n_vertices) { 4729 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4730 } 4731 if (n_constraints) { 4732 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4733 } 4734 ierr = PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 4735 4736 /* Checking coarse_sub_mat and coarse basis functios */ 4737 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4738 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4739 if (pcbddc->dbg_flag) { 4740 Mat coarse_sub_mat; 4741 Mat AUXMAT,TM1,TM2,TM3,TM4; 4742 Mat coarse_phi_D,coarse_phi_B; 4743 Mat coarse_psi_D,coarse_psi_B; 4744 Mat A_II,A_BB,A_IB,A_BI; 4745 Mat C_B,CPHI; 4746 IS is_dummy; 4747 Vec mones; 4748 MatType checkmattype=MATSEQAIJ; 4749 PetscReal real_value; 4750 4751 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4752 Mat A; 4753 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4754 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4755 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4756 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4757 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4758 ierr = MatDestroy(&A);CHKERRQ(ierr); 4759 } else { 4760 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4761 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4762 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4763 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4764 } 4765 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4766 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4767 if (!pcbddc->symmetric_primal) { 4768 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4769 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4770 } 4771 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4772 4773 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4774 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4775 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4776 if (!pcbddc->symmetric_primal) { 4777 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4778 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4779 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4780 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4781 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4782 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4783 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4784 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4785 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4786 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4787 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4788 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4789 } else { 4790 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4791 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4792 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4793 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4794 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4795 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4796 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4797 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4798 } 4799 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4800 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4801 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4802 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4803 if (pcbddc->benign_n) { 4804 Mat B0_B,B0_BPHI; 4805 const PetscScalar *data2; 4806 PetscScalar *data; 4807 PetscInt j; 4808 4809 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4810 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4811 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4812 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4813 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4814 ierr = MatDenseGetArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4815 for (j=0;j<pcbddc->benign_n;j++) { 4816 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4817 for (i=0;i<pcbddc->local_primal_size;i++) { 4818 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4819 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4820 } 4821 } 4822 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4823 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4824 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4825 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4826 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4827 } 4828 #if 0 4829 { 4830 PetscViewer viewer; 4831 char filename[256]; 4832 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4833 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4834 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4835 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4836 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4837 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4838 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4839 if (pcbddc->coarse_phi_B) { 4840 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4841 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4842 } 4843 if (pcbddc->coarse_phi_D) { 4844 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4845 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4846 } 4847 if (pcbddc->coarse_psi_B) { 4848 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4849 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4850 } 4851 if (pcbddc->coarse_psi_D) { 4852 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4853 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4854 } 4855 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4856 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4857 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4858 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4859 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4860 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4861 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4862 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4863 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4864 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4865 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4866 } 4867 #endif 4868 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4869 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4870 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4872 4873 /* check constraints */ 4874 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4875 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4876 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4877 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4878 } else { 4879 PetscScalar *data; 4880 Mat tmat; 4881 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4882 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4883 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4884 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4885 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4886 } 4887 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4888 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4889 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4890 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4891 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4892 if (!pcbddc->symmetric_primal) { 4893 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4894 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4895 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4896 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4897 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4898 } 4899 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4900 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4901 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4902 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4903 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4904 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4905 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4906 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4907 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4908 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4909 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4910 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4911 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4912 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4913 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4914 if (!pcbddc->symmetric_primal) { 4915 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4916 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4917 } 4918 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4919 } 4920 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4921 { 4922 PetscBool gpu; 4923 4924 ierr = PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);CHKERRQ(ierr); 4925 if (gpu) { 4926 if (pcbddc->local_auxmat1) { 4927 ierr = MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4928 } 4929 if (pcbddc->local_auxmat2) { 4930 ierr = MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4931 } 4932 if (pcbddc->coarse_phi_B) { 4933 ierr = MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4934 } 4935 if (pcbddc->coarse_phi_D) { 4936 ierr = MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4937 } 4938 if (pcbddc->coarse_psi_B) { 4939 ierr = MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4940 } 4941 if (pcbddc->coarse_psi_D) { 4942 ierr = MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4943 } 4944 } 4945 } 4946 /* get back data */ 4947 *coarse_submat_vals_n = coarse_submat_vals; 4948 PetscFunctionReturn(0); 4949 } 4950 4951 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4952 { 4953 Mat *work_mat; 4954 IS isrow_s,iscol_s; 4955 PetscBool rsorted,csorted; 4956 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4957 PetscErrorCode ierr; 4958 4959 PetscFunctionBegin; 4960 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4961 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4962 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4963 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4964 4965 if (!rsorted) { 4966 const PetscInt *idxs; 4967 PetscInt *idxs_sorted,i; 4968 4969 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4970 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4971 for (i=0;i<rsize;i++) { 4972 idxs_perm_r[i] = i; 4973 } 4974 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4975 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4976 for (i=0;i<rsize;i++) { 4977 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4978 } 4979 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4980 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4981 } else { 4982 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4983 isrow_s = isrow; 4984 } 4985 4986 if (!csorted) { 4987 if (isrow == iscol) { 4988 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4989 iscol_s = isrow_s; 4990 } else { 4991 const PetscInt *idxs; 4992 PetscInt *idxs_sorted,i; 4993 4994 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4995 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4996 for (i=0;i<csize;i++) { 4997 idxs_perm_c[i] = i; 4998 } 4999 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 5000 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 5001 for (i=0;i<csize;i++) { 5002 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5003 } 5004 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 5005 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 5006 } 5007 } else { 5008 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 5009 iscol_s = iscol; 5010 } 5011 5012 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5013 5014 if (!rsorted || !csorted) { 5015 Mat new_mat; 5016 IS is_perm_r,is_perm_c; 5017 5018 if (!rsorted) { 5019 PetscInt *idxs_r,i; 5020 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 5021 for (i=0;i<rsize;i++) { 5022 idxs_r[idxs_perm_r[i]] = i; 5023 } 5024 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 5025 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 5026 } else { 5027 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 5028 } 5029 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 5030 5031 if (!csorted) { 5032 if (isrow_s == iscol_s) { 5033 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 5034 is_perm_c = is_perm_r; 5035 } else { 5036 PetscInt *idxs_c,i; 5037 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5038 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 5039 for (i=0;i<csize;i++) { 5040 idxs_c[idxs_perm_c[i]] = i; 5041 } 5042 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 5043 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 5044 } 5045 } else { 5046 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 5047 } 5048 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 5049 5050 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 5051 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 5052 work_mat[0] = new_mat; 5053 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 5054 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 5055 } 5056 5057 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 5058 *B = work_mat[0]; 5059 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 5060 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 5061 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 5062 PetscFunctionReturn(0); 5063 } 5064 5065 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5066 { 5067 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5068 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5069 Mat new_mat,lA; 5070 IS is_local,is_global; 5071 PetscInt local_size; 5072 PetscBool isseqaij; 5073 PetscErrorCode ierr; 5074 5075 PetscFunctionBegin; 5076 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5077 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 5078 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 5079 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 5080 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 5081 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 5082 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5083 5084 if (pcbddc->dbg_flag) { 5085 Vec x,x_change; 5086 PetscReal error; 5087 5088 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 5089 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5090 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 5091 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5092 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5093 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 5094 if (!pcbddc->change_interior) { 5095 const PetscScalar *x,*y,*v; 5096 PetscReal lerror = 0.; 5097 PetscInt i; 5098 5099 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 5100 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 5101 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 5102 for (i=0;i<local_size;i++) 5103 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5104 lerror = PetscAbsScalar(x[i]-y[i]); 5105 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 5106 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 5107 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 5108 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 5109 if (error > PETSC_SMALL) { 5110 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5111 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 5112 } else { 5113 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 5114 } 5115 } 5116 } 5117 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5118 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5119 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5120 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5121 if (error > PETSC_SMALL) { 5122 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5123 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5124 } else { 5125 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5126 } 5127 } 5128 ierr = VecDestroy(&x);CHKERRQ(ierr); 5129 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5130 } 5131 5132 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5133 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 5134 5135 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5136 ierr = PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5137 if (isseqaij) { 5138 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5139 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5140 if (lA) { 5141 Mat work; 5142 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5143 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5144 ierr = MatDestroy(&work);CHKERRQ(ierr); 5145 } 5146 } else { 5147 Mat work_mat; 5148 5149 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5150 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5151 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5152 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 5153 if (lA) { 5154 Mat work; 5155 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5156 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5157 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5158 ierr = MatDestroy(&work);CHKERRQ(ierr); 5159 } 5160 } 5161 if (matis->A->symmetric_set) { 5162 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 5163 #if !defined(PETSC_USE_COMPLEX) 5164 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 5165 #endif 5166 } 5167 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 5168 PetscFunctionReturn(0); 5169 } 5170 5171 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5172 { 5173 PC_IS* pcis = (PC_IS*)(pc->data); 5174 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5175 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5176 PetscInt *idx_R_local=NULL; 5177 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5178 PetscInt vbs,bs; 5179 PetscBT bitmask=NULL; 5180 PetscErrorCode ierr; 5181 5182 PetscFunctionBegin; 5183 /* 5184 No need to setup local scatters if 5185 - primal space is unchanged 5186 AND 5187 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5188 AND 5189 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5190 */ 5191 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5192 PetscFunctionReturn(0); 5193 } 5194 /* destroy old objects */ 5195 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5196 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5197 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5198 /* Set Non-overlapping dimensions */ 5199 n_B = pcis->n_B; 5200 n_D = pcis->n - n_B; 5201 n_vertices = pcbddc->n_vertices; 5202 5203 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5204 5205 /* create auxiliary bitmask and allocate workspace */ 5206 if (!sub_schurs || !sub_schurs->reuse_solver) { 5207 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5208 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5209 for (i=0;i<n_vertices;i++) { 5210 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5211 } 5212 5213 for (i=0, n_R=0; i<pcis->n; i++) { 5214 if (!PetscBTLookup(bitmask,i)) { 5215 idx_R_local[n_R++] = i; 5216 } 5217 } 5218 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5219 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5220 5221 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5222 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5223 } 5224 5225 /* Block code */ 5226 vbs = 1; 5227 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5228 if (bs>1 && !(n_vertices%bs)) { 5229 PetscBool is_blocked = PETSC_TRUE; 5230 PetscInt *vary; 5231 if (!sub_schurs || !sub_schurs->reuse_solver) { 5232 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5233 ierr = PetscArrayzero(vary,pcis->n/bs);CHKERRQ(ierr); 5234 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5235 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5236 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5237 for (i=0; i<pcis->n/bs; i++) { 5238 if (vary[i]!=0 && vary[i]!=bs) { 5239 is_blocked = PETSC_FALSE; 5240 break; 5241 } 5242 } 5243 ierr = PetscFree(vary);CHKERRQ(ierr); 5244 } else { 5245 /* Verify directly the R set */ 5246 for (i=0; i<n_R/bs; i++) { 5247 PetscInt j,node=idx_R_local[bs*i]; 5248 for (j=1; j<bs; j++) { 5249 if (node != idx_R_local[bs*i+j]-j) { 5250 is_blocked = PETSC_FALSE; 5251 break; 5252 } 5253 } 5254 } 5255 } 5256 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5257 vbs = bs; 5258 for (i=0;i<n_R/vbs;i++) { 5259 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5260 } 5261 } 5262 } 5263 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5264 if (sub_schurs && sub_schurs->reuse_solver) { 5265 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5266 5267 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5268 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5269 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5270 reuse_solver->is_R = pcbddc->is_R_local; 5271 } else { 5272 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5273 } 5274 5275 /* print some info if requested */ 5276 if (pcbddc->dbg_flag) { 5277 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5278 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5279 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5282 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5283 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5284 } 5285 5286 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5287 if (!sub_schurs || !sub_schurs->reuse_solver) { 5288 IS is_aux1,is_aux2; 5289 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5290 5291 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5292 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5293 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5294 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5295 for (i=0; i<n_D; i++) { 5296 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5297 } 5298 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5299 for (i=0, j=0; i<n_R; i++) { 5300 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5301 aux_array1[j++] = i; 5302 } 5303 } 5304 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5305 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5306 for (i=0, j=0; i<n_B; i++) { 5307 if (!PetscBTLookup(bitmask,is_indices[i])) { 5308 aux_array2[j++] = i; 5309 } 5310 } 5311 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5312 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5313 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5314 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5315 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5316 5317 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5318 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5319 for (i=0, j=0; i<n_R; i++) { 5320 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5321 aux_array1[j++] = i; 5322 } 5323 } 5324 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5325 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5326 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5327 } 5328 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5329 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5330 } else { 5331 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5332 IS tis; 5333 PetscInt schur_size; 5334 5335 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5336 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5337 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5338 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5339 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5340 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5341 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5342 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5343 } 5344 } 5345 PetscFunctionReturn(0); 5346 } 5347 5348 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5349 { 5350 MatNullSpace NullSpace; 5351 Mat dmat; 5352 const Vec *nullvecs; 5353 Vec v,v2,*nullvecs2; 5354 VecScatter sct = NULL; 5355 PetscContainer c; 5356 PetscScalar *ddata; 5357 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5358 PetscBool nnsp_has_cnst; 5359 PetscErrorCode ierr; 5360 5361 PetscFunctionBegin; 5362 if (!is && !B) { /* MATIS */ 5363 Mat_IS* matis = (Mat_IS*)A->data; 5364 5365 if (!B) { 5366 ierr = MatISGetLocalMat(A,&B);CHKERRQ(ierr); 5367 } 5368 sct = matis->cctx; 5369 ierr = PetscObjectReference((PetscObject)sct);CHKERRQ(ierr); 5370 } else { 5371 ierr = MatGetNullSpace(B,&NullSpace);CHKERRQ(ierr); 5372 if (!NullSpace) { 5373 ierr = MatGetNearNullSpace(B,&NullSpace);CHKERRQ(ierr); 5374 } 5375 if (NullSpace) PetscFunctionReturn(0); 5376 } 5377 ierr = MatGetNullSpace(A,&NullSpace);CHKERRQ(ierr); 5378 if (!NullSpace) { 5379 ierr = MatGetNearNullSpace(A,&NullSpace);CHKERRQ(ierr); 5380 } 5381 if (!NullSpace) PetscFunctionReturn(0); 5382 5383 ierr = MatCreateVecs(A,&v,NULL);CHKERRQ(ierr); 5384 ierr = MatCreateVecs(B,&v2,NULL);CHKERRQ(ierr); 5385 if (!sct) { 5386 ierr = VecScatterCreate(v,is,v2,NULL,&sct);CHKERRQ(ierr); 5387 } 5388 ierr = MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);CHKERRQ(ierr); 5389 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5390 ierr = PetscMalloc1(bsiz,&nullvecs2);CHKERRQ(ierr); 5391 ierr = VecGetBlockSize(v2,&bs);CHKERRQ(ierr); 5392 ierr = VecGetSize(v2,&N);CHKERRQ(ierr); 5393 ierr = VecGetLocalSize(v2,&n);CHKERRQ(ierr); 5394 ierr = PetscMalloc1(n*bsiz,&ddata);CHKERRQ(ierr); 5395 for (k=0;k<nnsp_size;k++) { 5396 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);CHKERRQ(ierr); 5397 ierr = VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5398 ierr = VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5399 } 5400 if (nnsp_has_cnst) { 5401 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);CHKERRQ(ierr); 5402 ierr = VecSet(nullvecs2[nnsp_size],1.0);CHKERRQ(ierr); 5403 } 5404 ierr = PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);CHKERRQ(ierr); 5405 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);CHKERRQ(ierr); 5406 5407 ierr = MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);CHKERRQ(ierr); 5408 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);CHKERRQ(ierr); 5409 ierr = PetscContainerSetPointer(c,ddata);CHKERRQ(ierr); 5410 ierr = PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);CHKERRQ(ierr); 5411 ierr = PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);CHKERRQ(ierr); 5412 ierr = PetscContainerDestroy(&c);CHKERRQ(ierr); 5413 ierr = PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);CHKERRQ(ierr); 5414 ierr = MatDestroy(&dmat);CHKERRQ(ierr); 5415 5416 for (k=0;k<bsiz;k++) { 5417 ierr = VecDestroy(&nullvecs2[k]);CHKERRQ(ierr); 5418 } 5419 ierr = PetscFree(nullvecs2);CHKERRQ(ierr); 5420 ierr = MatSetNearNullSpace(B,NullSpace);CHKERRQ(ierr); 5421 ierr = MatNullSpaceDestroy(&NullSpace);CHKERRQ(ierr); 5422 ierr = VecDestroy(&v);CHKERRQ(ierr); 5423 ierr = VecDestroy(&v2);CHKERRQ(ierr); 5424 ierr = VecScatterDestroy(&sct);CHKERRQ(ierr); 5425 PetscFunctionReturn(0); 5426 } 5427 5428 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5429 { 5430 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5431 PC_IS *pcis = (PC_IS*)pc->data; 5432 PC pc_temp; 5433 Mat A_RR; 5434 MatNullSpace nnsp; 5435 MatReuse reuse; 5436 PetscScalar m_one = -1.0; 5437 PetscReal value; 5438 PetscInt n_D,n_R; 5439 PetscBool issbaij,opts; 5440 PetscErrorCode ierr; 5441 void (*f)(void) = NULL; 5442 char dir_prefix[256],neu_prefix[256],str_level[16]; 5443 size_t len; 5444 5445 PetscFunctionBegin; 5446 ierr = PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5447 /* approximate solver, propagate NearNullSpace if needed */ 5448 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5449 MatNullSpace gnnsp1,gnnsp2; 5450 PetscBool lhas,ghas; 5451 5452 ierr = MatGetNearNullSpace(pcbddc->local_mat,&nnsp);CHKERRQ(ierr); 5453 ierr = MatGetNearNullSpace(pc->pmat,&gnnsp1);CHKERRQ(ierr); 5454 ierr = MatGetNullSpace(pc->pmat,&gnnsp2);CHKERRQ(ierr); 5455 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5456 ierr = MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 5457 if (!ghas && (gnnsp1 || gnnsp2)) { 5458 ierr = MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);CHKERRQ(ierr); 5459 } 5460 } 5461 5462 /* compute prefixes */ 5463 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5464 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5465 if (!pcbddc->current_level) { 5466 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5467 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5468 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5469 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5470 } else { 5471 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5472 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5473 len -= 15; /* remove "pc_bddc_coarse_" */ 5474 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5475 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5476 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5477 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5478 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5479 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5480 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5481 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5482 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5483 } 5484 5485 /* DIRICHLET PROBLEM */ 5486 if (dirichlet) { 5487 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5488 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5489 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5490 if (pcbddc->dbg_flag) { 5491 Mat A_IIn; 5492 5493 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5494 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5495 pcis->A_II = A_IIn; 5496 } 5497 } 5498 if (pcbddc->local_mat->symmetric_set) { 5499 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5500 } 5501 /* Matrix for Dirichlet problem is pcis->A_II */ 5502 n_D = pcis->n - pcis->n_B; 5503 opts = PETSC_FALSE; 5504 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5505 opts = PETSC_TRUE; 5506 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5507 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5508 /* default */ 5509 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5510 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5511 ierr = PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5512 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5513 if (issbaij) { 5514 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5515 } else { 5516 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5517 } 5518 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 5519 } 5520 ierr = MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);CHKERRQ(ierr); 5521 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);CHKERRQ(ierr); 5522 /* Allow user's customization */ 5523 if (opts) { 5524 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5525 } 5526 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5527 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5528 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);CHKERRQ(ierr); 5529 } 5530 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5531 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5532 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5533 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5534 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5535 const PetscInt *idxs; 5536 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5537 5538 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5539 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5540 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5541 for (i=0;i<nl;i++) { 5542 for (d=0;d<cdim;d++) { 5543 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5544 } 5545 } 5546 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5547 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5548 ierr = PetscFree(scoords);CHKERRQ(ierr); 5549 } 5550 if (sub_schurs && sub_schurs->reuse_solver) { 5551 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5552 5553 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5554 } 5555 5556 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5557 if (!n_D) { 5558 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5559 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5560 } 5561 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5562 /* set ksp_D into pcis data */ 5563 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5564 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5565 pcis->ksp_D = pcbddc->ksp_D; 5566 } 5567 5568 /* NEUMANN PROBLEM */ 5569 A_RR = NULL; 5570 if (neumann) { 5571 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5572 PetscInt ibs,mbs; 5573 PetscBool issbaij, reuse_neumann_solver; 5574 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5575 5576 reuse_neumann_solver = PETSC_FALSE; 5577 if (sub_schurs && sub_schurs->reuse_solver) { 5578 IS iP; 5579 5580 reuse_neumann_solver = PETSC_TRUE; 5581 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5582 if (iP) reuse_neumann_solver = PETSC_FALSE; 5583 } 5584 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5585 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5586 if (pcbddc->ksp_R) { /* already created ksp */ 5587 PetscInt nn_R; 5588 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5589 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5590 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5591 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5592 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5593 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5594 reuse = MAT_INITIAL_MATRIX; 5595 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5596 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5597 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5598 reuse = MAT_INITIAL_MATRIX; 5599 } else { /* safe to reuse the matrix */ 5600 reuse = MAT_REUSE_MATRIX; 5601 } 5602 } 5603 /* last check */ 5604 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5605 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5606 reuse = MAT_INITIAL_MATRIX; 5607 } 5608 } else { /* first time, so we need to create the matrix */ 5609 reuse = MAT_INITIAL_MATRIX; 5610 } 5611 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5612 TODO: Get Rid of these conversions */ 5613 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5614 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5615 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5616 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5617 if (matis->A == pcbddc->local_mat) { 5618 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5619 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5620 } else { 5621 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5622 } 5623 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5624 if (matis->A == pcbddc->local_mat) { 5625 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5626 ierr = MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5627 } else { 5628 ierr = MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5629 } 5630 } 5631 /* extract A_RR */ 5632 if (reuse_neumann_solver) { 5633 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5634 5635 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5636 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5637 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5638 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5639 } else { 5640 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5641 } 5642 } else { 5643 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5644 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5645 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5646 } 5647 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5648 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5649 } 5650 if (pcbddc->local_mat->symmetric_set) { 5651 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5652 } 5653 opts = PETSC_FALSE; 5654 if (!pcbddc->ksp_R) { /* create object if not present */ 5655 opts = PETSC_TRUE; 5656 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5657 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5658 /* default */ 5659 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5660 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5661 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5662 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5663 if (issbaij) { 5664 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5665 } else { 5666 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5667 } 5668 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 5669 } 5670 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5671 ierr = MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);CHKERRQ(ierr); 5672 if (opts) { /* Allow user's customization once */ 5673 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5674 } 5675 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5676 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5677 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);CHKERRQ(ierr); 5678 } 5679 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5680 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5681 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5682 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5683 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5684 const PetscInt *idxs; 5685 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5686 5687 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5688 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5689 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5690 for (i=0;i<nl;i++) { 5691 for (d=0;d<cdim;d++) { 5692 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5693 } 5694 } 5695 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5696 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5697 ierr = PetscFree(scoords);CHKERRQ(ierr); 5698 } 5699 5700 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5701 if (!n_R) { 5702 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5703 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5704 } 5705 /* Reuse solver if it is present */ 5706 if (reuse_neumann_solver) { 5707 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5708 5709 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5710 } 5711 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5712 } 5713 5714 if (pcbddc->dbg_flag) { 5715 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5716 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5717 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5718 } 5719 ierr = PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5720 5721 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5722 if (pcbddc->NullSpace_corr[0]) { 5723 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5724 } 5725 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5726 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5727 } 5728 if (neumann && pcbddc->NullSpace_corr[2]) { 5729 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5730 } 5731 /* check Dirichlet and Neumann solvers */ 5732 if (pcbddc->dbg_flag) { 5733 if (dirichlet) { /* Dirichlet */ 5734 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5735 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5736 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5737 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 5738 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5739 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5740 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5741 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5742 } 5743 if (neumann) { /* Neumann */ 5744 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5745 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5746 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5747 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 5748 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5749 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5750 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5751 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5752 } 5753 } 5754 /* free Neumann problem's matrix */ 5755 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5756 PetscFunctionReturn(0); 5757 } 5758 5759 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5760 { 5761 PetscErrorCode ierr; 5762 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5763 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5764 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5765 5766 PetscFunctionBegin; 5767 if (!reuse_solver) { 5768 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5769 } 5770 if (!pcbddc->switch_static) { 5771 if (applytranspose && pcbddc->local_auxmat1) { 5772 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5773 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5774 } 5775 if (!reuse_solver) { 5776 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5777 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5778 } else { 5779 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5780 5781 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5782 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5783 } 5784 } else { 5785 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5786 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5787 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5788 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5789 if (applytranspose && pcbddc->local_auxmat1) { 5790 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5791 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5792 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5793 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5794 } 5795 } 5796 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);CHKERRQ(ierr); 5797 if (!reuse_solver || pcbddc->switch_static) { 5798 if (applytranspose) { 5799 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5800 } else { 5801 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5802 } 5803 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);CHKERRQ(ierr); 5804 } else { 5805 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5806 5807 if (applytranspose) { 5808 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5809 } else { 5810 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5811 } 5812 } 5813 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);CHKERRQ(ierr); 5814 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5815 if (!pcbddc->switch_static) { 5816 if (!reuse_solver) { 5817 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5818 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5819 } else { 5820 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5821 5822 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5823 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5824 } 5825 if (!applytranspose && pcbddc->local_auxmat1) { 5826 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5827 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5828 } 5829 } else { 5830 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5831 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5832 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5833 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5834 if (!applytranspose && pcbddc->local_auxmat1) { 5835 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5836 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5837 } 5838 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5839 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5840 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5841 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5842 } 5843 PetscFunctionReturn(0); 5844 } 5845 5846 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5847 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5848 { 5849 PetscErrorCode ierr; 5850 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5851 PC_IS* pcis = (PC_IS*) (pc->data); 5852 const PetscScalar zero = 0.0; 5853 5854 PetscFunctionBegin; 5855 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5856 if (!pcbddc->benign_apply_coarse_only) { 5857 if (applytranspose) { 5858 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5859 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5860 } else { 5861 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5862 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5863 } 5864 } else { 5865 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5866 } 5867 5868 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5869 if (pcbddc->benign_n) { 5870 PetscScalar *array; 5871 PetscInt j; 5872 5873 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5874 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5875 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5876 } 5877 5878 /* start communications from local primal nodes to rhs of coarse solver */ 5879 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5880 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5881 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5882 5883 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5884 if (pcbddc->coarse_ksp) { 5885 Mat coarse_mat; 5886 Vec rhs,sol; 5887 MatNullSpace nullsp; 5888 PetscBool isbddc = PETSC_FALSE; 5889 5890 if (pcbddc->benign_have_null) { 5891 PC coarse_pc; 5892 5893 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5894 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5895 /* we need to propagate to coarser levels the need for a possible benign correction */ 5896 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5897 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5898 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5899 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5900 } 5901 } 5902 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5903 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5904 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5905 if (applytranspose) { 5906 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5907 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5908 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5909 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5910 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5911 ierr = MatGetTransposeNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5912 if (nullsp) { 5913 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5914 } 5915 } else { 5916 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5917 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5918 PC coarse_pc; 5919 5920 if (nullsp) { 5921 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5922 } 5923 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5924 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5925 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5926 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5927 } else { 5928 ierr = PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5929 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5930 ierr = PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);CHKERRQ(ierr); 5931 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5932 if (nullsp) { 5933 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5934 } 5935 } 5936 } 5937 /* we don't need the benign correction at coarser levels anymore */ 5938 if (pcbddc->benign_have_null && isbddc) { 5939 PC coarse_pc; 5940 PC_BDDC* coarsepcbddc; 5941 5942 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5943 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5944 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5945 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5946 } 5947 } 5948 5949 /* Local solution on R nodes */ 5950 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5951 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5952 } 5953 /* communications from coarse sol to local primal nodes */ 5954 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5955 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5956 5957 /* Sum contributions from the two levels */ 5958 if (!pcbddc->benign_apply_coarse_only) { 5959 if (applytranspose) { 5960 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5961 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5962 } else { 5963 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5964 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5965 } 5966 /* store p0 */ 5967 if (pcbddc->benign_n) { 5968 PetscScalar *array; 5969 PetscInt j; 5970 5971 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5972 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5973 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5974 } 5975 } else { /* expand the coarse solution */ 5976 if (applytranspose) { 5977 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5978 } else { 5979 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5980 } 5981 } 5982 PetscFunctionReturn(0); 5983 } 5984 5985 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5986 { 5987 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5988 Vec from,to; 5989 const PetscScalar *array; 5990 PetscErrorCode ierr; 5991 5992 PetscFunctionBegin; 5993 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5994 from = pcbddc->coarse_vec; 5995 to = pcbddc->vec1_P; 5996 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5997 Vec tvec; 5998 5999 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 6000 ierr = VecResetArray(tvec);CHKERRQ(ierr); 6001 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 6002 ierr = VecGetArrayRead(tvec,&array);CHKERRQ(ierr); 6003 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 6004 ierr = VecRestoreArrayRead(tvec,&array);CHKERRQ(ierr); 6005 } 6006 } else { /* from local to global -> put data in coarse right hand side */ 6007 from = pcbddc->vec1_P; 6008 to = pcbddc->coarse_vec; 6009 } 6010 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6011 PetscFunctionReturn(0); 6012 } 6013 6014 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6015 { 6016 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 6017 Vec from,to; 6018 const PetscScalar *array; 6019 PetscErrorCode ierr; 6020 6021 PetscFunctionBegin; 6022 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6023 from = pcbddc->coarse_vec; 6024 to = pcbddc->vec1_P; 6025 } else { /* from local to global -> put data in coarse right hand side */ 6026 from = pcbddc->vec1_P; 6027 to = pcbddc->coarse_vec; 6028 } 6029 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6030 if (smode == SCATTER_FORWARD) { 6031 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6032 Vec tvec; 6033 6034 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 6035 ierr = VecGetArrayRead(to,&array);CHKERRQ(ierr); 6036 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 6037 ierr = VecRestoreArrayRead(to,&array);CHKERRQ(ierr); 6038 } 6039 } else { 6040 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6041 ierr = VecResetArray(from);CHKERRQ(ierr); 6042 } 6043 } 6044 PetscFunctionReturn(0); 6045 } 6046 6047 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6048 { 6049 PetscErrorCode ierr; 6050 PC_IS* pcis = (PC_IS*)(pc->data); 6051 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6052 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6053 /* one and zero */ 6054 PetscScalar one=1.0,zero=0.0; 6055 /* space to store constraints and their local indices */ 6056 PetscScalar *constraints_data; 6057 PetscInt *constraints_idxs,*constraints_idxs_B; 6058 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6059 PetscInt *constraints_n; 6060 /* iterators */ 6061 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6062 /* BLAS integers */ 6063 PetscBLASInt lwork,lierr; 6064 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6065 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6066 /* reuse */ 6067 PetscInt olocal_primal_size,olocal_primal_size_cc; 6068 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6069 /* change of basis */ 6070 PetscBool qr_needed; 6071 PetscBT change_basis,qr_needed_idx; 6072 /* auxiliary stuff */ 6073 PetscInt *nnz,*is_indices; 6074 PetscInt ncc; 6075 /* some quantities */ 6076 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6077 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6078 PetscReal tol; /* tolerance for retaining eigenmodes */ 6079 6080 PetscFunctionBegin; 6081 tol = PetscSqrtReal(PETSC_SMALL); 6082 /* Destroy Mat objects computed previously */ 6083 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6084 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6085 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 6086 /* save info on constraints from previous setup (if any) */ 6087 olocal_primal_size = pcbddc->local_primal_size; 6088 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6089 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 6090 ierr = PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);CHKERRQ(ierr); 6091 ierr = PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);CHKERRQ(ierr); 6092 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6093 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6094 6095 if (!pcbddc->adaptive_selection) { 6096 IS ISForVertices,*ISForFaces,*ISForEdges; 6097 MatNullSpace nearnullsp; 6098 const Vec *nearnullvecs; 6099 Vec *localnearnullsp; 6100 PetscScalar *array; 6101 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 6102 PetscBool nnsp_has_cnst; 6103 /* LAPACK working arrays for SVD or POD */ 6104 PetscBool skip_lapack,boolforchange; 6105 PetscScalar *work; 6106 PetscReal *singular_vals; 6107 #if defined(PETSC_USE_COMPLEX) 6108 PetscReal *rwork; 6109 #endif 6110 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6111 PetscBLASInt dummy_int=1; 6112 PetscScalar dummy_scalar=1.; 6113 PetscBool use_pod = PETSC_FALSE; 6114 6115 /* MKL SVD with same input gives different results on different processes! */ 6116 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL) 6117 use_pod = PETSC_TRUE; 6118 #endif 6119 /* Get index sets for faces, edges and vertices from graph */ 6120 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 6121 /* print some info */ 6122 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6123 PetscInt nv; 6124 6125 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6126 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 6127 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6128 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6129 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6130 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 6131 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 6132 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6133 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6134 } 6135 6136 /* free unneeded index sets */ 6137 if (!pcbddc->use_vertices) { 6138 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6139 } 6140 if (!pcbddc->use_edges) { 6141 for (i=0;i<n_ISForEdges;i++) { 6142 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6143 } 6144 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6145 n_ISForEdges = 0; 6146 } 6147 if (!pcbddc->use_faces) { 6148 for (i=0;i<n_ISForFaces;i++) { 6149 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6150 } 6151 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6152 n_ISForFaces = 0; 6153 } 6154 6155 /* check if near null space is attached to global mat */ 6156 if (pcbddc->use_nnsp) { 6157 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 6158 } else nearnullsp = NULL; 6159 6160 if (nearnullsp) { 6161 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 6162 /* remove any stored info */ 6163 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 6164 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6165 /* store information for BDDC solver reuse */ 6166 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 6167 pcbddc->onearnullspace = nearnullsp; 6168 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6169 for (i=0;i<nnsp_size;i++) { 6170 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 6171 } 6172 } else { /* if near null space is not provided BDDC uses constants by default */ 6173 nnsp_size = 0; 6174 nnsp_has_cnst = PETSC_TRUE; 6175 } 6176 /* get max number of constraints on a single cc */ 6177 max_constraints = nnsp_size; 6178 if (nnsp_has_cnst) max_constraints++; 6179 6180 /* 6181 Evaluate maximum storage size needed by the procedure 6182 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6183 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6184 There can be multiple constraints per connected component 6185 */ 6186 n_vertices = 0; 6187 if (ISForVertices) { 6188 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 6189 } 6190 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6191 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 6192 6193 total_counts = n_ISForFaces+n_ISForEdges; 6194 total_counts *= max_constraints; 6195 total_counts += n_vertices; 6196 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 6197 6198 total_counts = 0; 6199 max_size_of_constraint = 0; 6200 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6201 IS used_is; 6202 if (i<n_ISForEdges) { 6203 used_is = ISForEdges[i]; 6204 } else { 6205 used_is = ISForFaces[i-n_ISForEdges]; 6206 } 6207 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 6208 total_counts += j; 6209 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6210 } 6211 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 6212 6213 /* get local part of global near null space vectors */ 6214 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 6215 for (k=0;k<nnsp_size;k++) { 6216 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 6217 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6218 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6219 } 6220 6221 /* whether or not to skip lapack calls */ 6222 skip_lapack = PETSC_TRUE; 6223 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6224 6225 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6226 if (!skip_lapack) { 6227 PetscScalar temp_work; 6228 6229 if (use_pod) { 6230 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6231 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 6232 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 6233 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 6234 #if defined(PETSC_USE_COMPLEX) 6235 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 6236 #endif 6237 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6238 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6239 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 6240 lwork = -1; 6241 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6242 #if !defined(PETSC_USE_COMPLEX) 6243 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6244 #else 6245 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6246 #endif 6247 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6248 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6249 } else { 6250 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6251 /* SVD */ 6252 PetscInt max_n,min_n; 6253 max_n = max_size_of_constraint; 6254 min_n = max_constraints; 6255 if (max_size_of_constraint < max_constraints) { 6256 min_n = max_size_of_constraint; 6257 max_n = max_constraints; 6258 } 6259 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 6260 #if defined(PETSC_USE_COMPLEX) 6261 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 6262 #endif 6263 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6264 lwork = -1; 6265 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 6266 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 6267 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 6268 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6269 #if !defined(PETSC_USE_COMPLEX) 6270 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6271 #else 6272 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6273 #endif 6274 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6275 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6276 #else 6277 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6278 #endif /* on missing GESVD */ 6279 } 6280 /* Allocate optimal workspace */ 6281 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 6282 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 6283 } 6284 /* Now we can loop on constraining sets */ 6285 total_counts = 0; 6286 constraints_idxs_ptr[0] = 0; 6287 constraints_data_ptr[0] = 0; 6288 /* vertices */ 6289 if (n_vertices) { 6290 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6291 ierr = PetscArraycpy(constraints_idxs,is_indices,n_vertices);CHKERRQ(ierr); 6292 for (i=0;i<n_vertices;i++) { 6293 constraints_n[total_counts] = 1; 6294 constraints_data[total_counts] = 1.0; 6295 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6296 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6297 total_counts++; 6298 } 6299 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6300 n_vertices = total_counts; 6301 } 6302 6303 /* edges and faces */ 6304 total_counts_cc = total_counts; 6305 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6306 IS used_is; 6307 PetscBool idxs_copied = PETSC_FALSE; 6308 6309 if (ncc<n_ISForEdges) { 6310 used_is = ISForEdges[ncc]; 6311 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6312 } else { 6313 used_is = ISForFaces[ncc-n_ISForEdges]; 6314 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6315 } 6316 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6317 6318 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6319 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6320 /* change of basis should not be performed on local periodic nodes */ 6321 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6322 if (nnsp_has_cnst) { 6323 PetscScalar quad_value; 6324 6325 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6326 idxs_copied = PETSC_TRUE; 6327 6328 if (!pcbddc->use_nnsp_true) { 6329 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6330 } else { 6331 quad_value = 1.0; 6332 } 6333 for (j=0;j<size_of_constraint;j++) { 6334 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6335 } 6336 temp_constraints++; 6337 total_counts++; 6338 } 6339 for (k=0;k<nnsp_size;k++) { 6340 PetscReal real_value; 6341 PetscScalar *ptr_to_data; 6342 6343 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6344 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6345 for (j=0;j<size_of_constraint;j++) { 6346 ptr_to_data[j] = array[is_indices[j]]; 6347 } 6348 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6349 /* check if array is null on the connected component */ 6350 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6351 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6352 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6353 temp_constraints++; 6354 total_counts++; 6355 if (!idxs_copied) { 6356 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6357 idxs_copied = PETSC_TRUE; 6358 } 6359 } 6360 } 6361 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6362 valid_constraints = temp_constraints; 6363 if (!pcbddc->use_nnsp_true && temp_constraints) { 6364 if (temp_constraints == 1) { /* just normalize the constraint */ 6365 PetscScalar norm,*ptr_to_data; 6366 6367 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6368 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6369 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6370 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6371 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6372 } else { /* perform SVD */ 6373 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6374 6375 if (use_pod) { 6376 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6377 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6378 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6379 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6380 from that computed using LAPACKgesvd 6381 -> This is due to a different computation of eigenvectors in LAPACKheev 6382 -> The quality of the POD-computed basis will be the same */ 6383 ierr = PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);CHKERRQ(ierr); 6384 /* Store upper triangular part of correlation matrix */ 6385 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6386 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6387 for (j=0;j<temp_constraints;j++) { 6388 for (k=0;k<j+1;k++) { 6389 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6390 } 6391 } 6392 /* compute eigenvalues and eigenvectors of correlation matrix */ 6393 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6394 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6395 #if !defined(PETSC_USE_COMPLEX) 6396 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6397 #else 6398 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6399 #endif 6400 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6401 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6402 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6403 j = 0; 6404 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6405 total_counts = total_counts-j; 6406 valid_constraints = temp_constraints-j; 6407 /* scale and copy POD basis into used quadrature memory */ 6408 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6409 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6410 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6411 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6412 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6413 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6414 if (j<temp_constraints) { 6415 PetscInt ii; 6416 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6417 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6418 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6419 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6420 for (k=0;k<temp_constraints-j;k++) { 6421 for (ii=0;ii<size_of_constraint;ii++) { 6422 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6423 } 6424 } 6425 } 6426 } else { 6427 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6428 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6429 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6430 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6431 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6432 #if !defined(PETSC_USE_COMPLEX) 6433 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6434 #else 6435 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6436 #endif 6437 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6438 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6439 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6440 k = temp_constraints; 6441 if (k > size_of_constraint) k = size_of_constraint; 6442 j = 0; 6443 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6444 valid_constraints = k-j; 6445 total_counts = total_counts-temp_constraints+valid_constraints; 6446 #else 6447 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6448 #endif /* on missing GESVD */ 6449 } 6450 } 6451 } 6452 /* update pointers information */ 6453 if (valid_constraints) { 6454 constraints_n[total_counts_cc] = valid_constraints; 6455 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6456 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6457 /* set change_of_basis flag */ 6458 if (boolforchange) { 6459 PetscBTSet(change_basis,total_counts_cc); 6460 } 6461 total_counts_cc++; 6462 } 6463 } 6464 /* free workspace */ 6465 if (!skip_lapack) { 6466 ierr = PetscFree(work);CHKERRQ(ierr); 6467 #if defined(PETSC_USE_COMPLEX) 6468 ierr = PetscFree(rwork);CHKERRQ(ierr); 6469 #endif 6470 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6471 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6472 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6473 } 6474 for (k=0;k<nnsp_size;k++) { 6475 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6476 } 6477 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6478 /* free index sets of faces, edges and vertices */ 6479 for (i=0;i<n_ISForFaces;i++) { 6480 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6481 } 6482 if (n_ISForFaces) { 6483 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6484 } 6485 for (i=0;i<n_ISForEdges;i++) { 6486 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6487 } 6488 if (n_ISForEdges) { 6489 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6490 } 6491 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6492 } else { 6493 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6494 6495 total_counts = 0; 6496 n_vertices = 0; 6497 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6498 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6499 } 6500 max_constraints = 0; 6501 total_counts_cc = 0; 6502 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6503 total_counts += pcbddc->adaptive_constraints_n[i]; 6504 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6505 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6506 } 6507 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6508 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6509 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6510 constraints_data = pcbddc->adaptive_constraints_data; 6511 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6512 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6513 total_counts_cc = 0; 6514 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6515 if (pcbddc->adaptive_constraints_n[i]) { 6516 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6517 } 6518 } 6519 6520 max_size_of_constraint = 0; 6521 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6522 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6523 /* Change of basis */ 6524 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6525 if (pcbddc->use_change_of_basis) { 6526 for (i=0;i<sub_schurs->n_subs;i++) { 6527 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6528 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6529 } 6530 } 6531 } 6532 } 6533 pcbddc->local_primal_size = total_counts; 6534 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6535 6536 /* map constraints_idxs in boundary numbering */ 6537 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6538 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6539 6540 /* Create constraint matrix */ 6541 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6542 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6543 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6544 6545 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6546 /* determine if a QR strategy is needed for change of basis */ 6547 qr_needed = pcbddc->use_qr_single; 6548 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6549 total_primal_vertices=0; 6550 pcbddc->local_primal_size_cc = 0; 6551 for (i=0;i<total_counts_cc;i++) { 6552 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6553 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6554 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6555 pcbddc->local_primal_size_cc += 1; 6556 } else if (PetscBTLookup(change_basis,i)) { 6557 for (k=0;k<constraints_n[i];k++) { 6558 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6559 } 6560 pcbddc->local_primal_size_cc += constraints_n[i]; 6561 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6562 PetscBTSet(qr_needed_idx,i); 6563 qr_needed = PETSC_TRUE; 6564 } 6565 } else { 6566 pcbddc->local_primal_size_cc += 1; 6567 } 6568 } 6569 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6570 pcbddc->n_vertices = total_primal_vertices; 6571 /* permute indices in order to have a sorted set of vertices */ 6572 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6573 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6574 ierr = PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);CHKERRQ(ierr); 6575 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6576 6577 /* nonzero structure of constraint matrix */ 6578 /* and get reference dof for local constraints */ 6579 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6580 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6581 6582 j = total_primal_vertices; 6583 total_counts = total_primal_vertices; 6584 cum = total_primal_vertices; 6585 for (i=n_vertices;i<total_counts_cc;i++) { 6586 if (!PetscBTLookup(change_basis,i)) { 6587 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6588 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6589 cum++; 6590 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6591 for (k=0;k<constraints_n[i];k++) { 6592 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6593 nnz[j+k] = size_of_constraint; 6594 } 6595 j += constraints_n[i]; 6596 } 6597 } 6598 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6599 ierr = MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6600 ierr = PetscFree(nnz);CHKERRQ(ierr); 6601 6602 /* set values in constraint matrix */ 6603 for (i=0;i<total_primal_vertices;i++) { 6604 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6605 } 6606 total_counts = total_primal_vertices; 6607 for (i=n_vertices;i<total_counts_cc;i++) { 6608 if (!PetscBTLookup(change_basis,i)) { 6609 PetscInt *cols; 6610 6611 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6612 cols = constraints_idxs+constraints_idxs_ptr[i]; 6613 for (k=0;k<constraints_n[i];k++) { 6614 PetscInt row = total_counts+k; 6615 PetscScalar *vals; 6616 6617 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6618 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6619 } 6620 total_counts += constraints_n[i]; 6621 } 6622 } 6623 /* assembling */ 6624 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6625 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6626 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6627 6628 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6629 if (pcbddc->use_change_of_basis) { 6630 /* dual and primal dofs on a single cc */ 6631 PetscInt dual_dofs,primal_dofs; 6632 /* working stuff for GEQRF */ 6633 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6634 PetscBLASInt lqr_work; 6635 /* working stuff for UNGQR */ 6636 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6637 PetscBLASInt lgqr_work; 6638 /* working stuff for TRTRS */ 6639 PetscScalar *trs_rhs = NULL; 6640 PetscBLASInt Blas_NRHS; 6641 /* pointers for values insertion into change of basis matrix */ 6642 PetscInt *start_rows,*start_cols; 6643 PetscScalar *start_vals; 6644 /* working stuff for values insertion */ 6645 PetscBT is_primal; 6646 PetscInt *aux_primal_numbering_B; 6647 /* matrix sizes */ 6648 PetscInt global_size,local_size; 6649 /* temporary change of basis */ 6650 Mat localChangeOfBasisMatrix; 6651 /* extra space for debugging */ 6652 PetscScalar *dbg_work = NULL; 6653 6654 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6655 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6656 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6657 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6658 /* nonzeros for local mat */ 6659 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6660 if (!pcbddc->benign_change || pcbddc->fake_change) { 6661 for (i=0;i<pcis->n;i++) nnz[i]=1; 6662 } else { 6663 const PetscInt *ii; 6664 PetscInt n; 6665 PetscBool flg_row; 6666 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6667 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6668 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6669 } 6670 for (i=n_vertices;i<total_counts_cc;i++) { 6671 if (PetscBTLookup(change_basis,i)) { 6672 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6673 if (PetscBTLookup(qr_needed_idx,i)) { 6674 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6675 } else { 6676 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6677 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6678 } 6679 } 6680 } 6681 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6682 ierr = MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6683 ierr = PetscFree(nnz);CHKERRQ(ierr); 6684 /* Set interior change in the matrix */ 6685 if (!pcbddc->benign_change || pcbddc->fake_change) { 6686 for (i=0;i<pcis->n;i++) { 6687 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6688 } 6689 } else { 6690 const PetscInt *ii,*jj; 6691 PetscScalar *aa; 6692 PetscInt n; 6693 PetscBool flg_row; 6694 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6695 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6696 for (i=0;i<n;i++) { 6697 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6698 } 6699 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6700 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6701 } 6702 6703 if (pcbddc->dbg_flag) { 6704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6706 } 6707 6708 /* Now we loop on the constraints which need a change of basis */ 6709 /* 6710 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6711 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6712 6713 Basic blocks of change of basis matrix T computed by 6714 6715 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6716 6717 | 1 0 ... 0 s_1/S | 6718 | 0 1 ... 0 s_2/S | 6719 | ... | 6720 | 0 ... 1 s_{n-1}/S | 6721 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6722 6723 with S = \sum_{i=1}^n s_i^2 6724 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6725 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6726 6727 - QR decomposition of constraints otherwise 6728 */ 6729 if (qr_needed && max_size_of_constraint) { 6730 /* space to store Q */ 6731 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6732 /* array to store scaling factors for reflectors */ 6733 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6734 /* first we issue queries for optimal work */ 6735 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6736 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6737 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6738 lqr_work = -1; 6739 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6740 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6741 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6742 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6743 lgqr_work = -1; 6744 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6745 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6746 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6747 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6748 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6749 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6750 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6751 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6752 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6753 /* array to store rhs and solution of triangular solver */ 6754 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6755 /* allocating workspace for check */ 6756 if (pcbddc->dbg_flag) { 6757 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6758 } 6759 } 6760 /* array to store whether a node is primal or not */ 6761 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6762 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6763 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6764 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6765 for (i=0;i<total_primal_vertices;i++) { 6766 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6767 } 6768 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6769 6770 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6771 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6772 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6773 if (PetscBTLookup(change_basis,total_counts)) { 6774 /* get constraint info */ 6775 primal_dofs = constraints_n[total_counts]; 6776 dual_dofs = size_of_constraint-primal_dofs; 6777 6778 if (pcbddc->dbg_flag) { 6779 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6780 } 6781 6782 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6783 6784 /* copy quadrature constraints for change of basis check */ 6785 if (pcbddc->dbg_flag) { 6786 ierr = PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6787 } 6788 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6789 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6790 6791 /* compute QR decomposition of constraints */ 6792 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6793 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6794 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6795 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6796 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6797 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6798 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6799 6800 /* explicitly compute R^-T */ 6801 ierr = PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);CHKERRQ(ierr); 6802 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6803 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6804 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6805 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6806 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6807 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6808 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6809 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6810 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6811 6812 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6813 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6814 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6815 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6817 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6818 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6819 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6820 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6821 6822 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6823 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6824 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6825 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6826 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6827 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6828 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6829 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6830 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6831 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6832 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6833 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6834 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6835 6836 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6837 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6838 /* insert cols for primal dofs */ 6839 for (j=0;j<primal_dofs;j++) { 6840 start_vals = &qr_basis[j*size_of_constraint]; 6841 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6842 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6843 } 6844 /* insert cols for dual dofs */ 6845 for (j=0,k=0;j<dual_dofs;k++) { 6846 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6847 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6848 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6849 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6850 j++; 6851 } 6852 } 6853 6854 /* check change of basis */ 6855 if (pcbddc->dbg_flag) { 6856 PetscInt ii,jj; 6857 PetscBool valid_qr=PETSC_TRUE; 6858 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6859 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6860 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6861 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6862 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6863 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6864 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6865 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6866 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6867 for (jj=0;jj<size_of_constraint;jj++) { 6868 for (ii=0;ii<primal_dofs;ii++) { 6869 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6870 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6871 } 6872 } 6873 if (!valid_qr) { 6874 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6875 for (jj=0;jj<size_of_constraint;jj++) { 6876 for (ii=0;ii<primal_dofs;ii++) { 6877 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6878 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6879 } 6880 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6881 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6882 } 6883 } 6884 } 6885 } else { 6886 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6887 } 6888 } 6889 } else { /* simple transformation block */ 6890 PetscInt row,col; 6891 PetscScalar val,norm; 6892 6893 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6894 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6895 for (j=0;j<size_of_constraint;j++) { 6896 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6897 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6898 if (!PetscBTLookup(is_primal,row_B)) { 6899 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6900 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6901 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6902 } else { 6903 for (k=0;k<size_of_constraint;k++) { 6904 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6905 if (row != col) { 6906 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6907 } else { 6908 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6909 } 6910 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6911 } 6912 } 6913 } 6914 if (pcbddc->dbg_flag) { 6915 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6916 } 6917 } 6918 } else { 6919 if (pcbddc->dbg_flag) { 6920 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6921 } 6922 } 6923 } 6924 6925 /* free workspace */ 6926 if (qr_needed) { 6927 if (pcbddc->dbg_flag) { 6928 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6929 } 6930 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6931 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6932 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6933 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6934 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6935 } 6936 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6937 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6938 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6939 6940 /* assembling of global change of variable */ 6941 if (!pcbddc->fake_change) { 6942 Mat tmat; 6943 PetscInt bs; 6944 6945 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6946 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6947 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6948 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6949 ierr = MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6950 ierr = MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6951 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6952 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6953 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6954 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6955 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6956 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6957 ierr = MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6958 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6959 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6960 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6961 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6962 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6963 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6964 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6965 6966 /* check */ 6967 if (pcbddc->dbg_flag) { 6968 PetscReal error; 6969 Vec x,x_change; 6970 6971 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6972 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6973 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6974 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6975 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6976 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6977 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6978 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6979 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6980 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6981 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6982 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6983 if (error > PETSC_SMALL) { 6984 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6985 } 6986 ierr = VecDestroy(&x);CHKERRQ(ierr); 6987 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6988 } 6989 /* adapt sub_schurs computed (if any) */ 6990 if (pcbddc->use_deluxe_scaling) { 6991 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6992 6993 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6994 if (sub_schurs && sub_schurs->S_Ej_all) { 6995 Mat S_new,tmat; 6996 IS is_all_N,is_V_Sall = NULL; 6997 6998 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6999 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 7000 if (pcbddc->deluxe_zerorows) { 7001 ISLocalToGlobalMapping NtoSall; 7002 IS is_V; 7003 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 7004 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 7005 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 7006 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 7007 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 7008 } 7009 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 7010 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7011 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 7012 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7013 if (pcbddc->deluxe_zerorows) { 7014 const PetscScalar *array; 7015 const PetscInt *idxs_V,*idxs_all; 7016 PetscInt i,n_V; 7017 7018 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7019 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 7020 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7021 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7022 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 7023 for (i=0;i<n_V;i++) { 7024 PetscScalar val; 7025 PetscInt idx; 7026 7027 idx = idxs_V[i]; 7028 val = array[idxs_all[idxs_V[i]]]; 7029 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 7030 } 7031 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7032 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7033 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 7034 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7035 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7036 } 7037 sub_schurs->S_Ej_all = S_new; 7038 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7039 if (sub_schurs->sum_S_Ej_all) { 7040 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7041 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 7042 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7043 if (pcbddc->deluxe_zerorows) { 7044 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7045 } 7046 sub_schurs->sum_S_Ej_all = S_new; 7047 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7048 } 7049 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 7050 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 7051 } 7052 /* destroy any change of basis context in sub_schurs */ 7053 if (sub_schurs && sub_schurs->change) { 7054 PetscInt i; 7055 7056 for (i=0;i<sub_schurs->n_subs;i++) { 7057 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 7058 } 7059 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 7060 } 7061 } 7062 if (pcbddc->switch_static) { /* need to save the local change */ 7063 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7064 } else { 7065 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 7066 } 7067 /* determine if any process has changed the pressures locally */ 7068 pcbddc->change_interior = pcbddc->benign_have_null; 7069 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7070 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 7071 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7072 pcbddc->use_qr_single = qr_needed; 7073 } 7074 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7075 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7076 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 7077 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7078 } else { 7079 Mat benign_global = NULL; 7080 if (pcbddc->benign_have_null) { 7081 Mat M; 7082 7083 pcbddc->change_interior = PETSC_TRUE; 7084 ierr = VecCopy(matis->counter,pcis->vec1_N);CHKERRQ(ierr); 7085 ierr = VecReciprocal(pcis->vec1_N);CHKERRQ(ierr); 7086 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);CHKERRQ(ierr); 7087 if (pcbddc->benign_change) { 7088 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 7089 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 7090 } else { 7091 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);CHKERRQ(ierr); 7092 ierr = MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);CHKERRQ(ierr); 7093 } 7094 ierr = MatISSetLocalMat(benign_global,M);CHKERRQ(ierr); 7095 ierr = MatDestroy(&M);CHKERRQ(ierr); 7096 ierr = MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7097 ierr = MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7098 } 7099 if (pcbddc->user_ChangeOfBasisMatrix) { 7100 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 7101 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 7102 } else if (pcbddc->benign_have_null) { 7103 pcbddc->ChangeOfBasisMatrix = benign_global; 7104 } 7105 } 7106 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7107 IS is_global; 7108 const PetscInt *gidxs; 7109 7110 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7111 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 7112 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7113 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 7114 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 7115 } 7116 } 7117 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7118 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 7119 } 7120 7121 if (!pcbddc->fake_change) { 7122 /* add pressure dofs to set of primal nodes for numbering purposes */ 7123 for (i=0;i<pcbddc->benign_n;i++) { 7124 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7125 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7126 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7127 pcbddc->local_primal_size_cc++; 7128 pcbddc->local_primal_size++; 7129 } 7130 7131 /* check if a new primal space has been introduced (also take into account benign trick) */ 7132 pcbddc->new_primal_space_local = PETSC_TRUE; 7133 if (olocal_primal_size == pcbddc->local_primal_size) { 7134 ierr = PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7135 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7136 if (!pcbddc->new_primal_space_local) { 7137 ierr = PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7138 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7139 } 7140 } 7141 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7142 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 7143 } 7144 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 7145 7146 /* flush dbg viewer */ 7147 if (pcbddc->dbg_flag) { 7148 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7149 } 7150 7151 /* free workspace */ 7152 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 7153 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 7154 if (!pcbddc->adaptive_selection) { 7155 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 7156 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 7157 } else { 7158 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 7159 pcbddc->adaptive_constraints_idxs_ptr, 7160 pcbddc->adaptive_constraints_data_ptr, 7161 pcbddc->adaptive_constraints_idxs, 7162 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 7163 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 7164 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 7165 } 7166 PetscFunctionReturn(0); 7167 } 7168 7169 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7170 { 7171 ISLocalToGlobalMapping map; 7172 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7173 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7174 PetscInt i,N; 7175 PetscBool rcsr = PETSC_FALSE; 7176 PetscErrorCode ierr; 7177 7178 PetscFunctionBegin; 7179 if (pcbddc->recompute_topography) { 7180 pcbddc->graphanalyzed = PETSC_FALSE; 7181 /* Reset previously computed graph */ 7182 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 7183 /* Init local Graph struct */ 7184 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 7185 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 7186 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 7187 7188 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7189 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7190 } 7191 /* Check validity of the csr graph passed in by the user */ 7192 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7193 7194 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7195 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7196 PetscInt *xadj,*adjncy; 7197 PetscInt nvtxs; 7198 PetscBool flg_row=PETSC_FALSE; 7199 7200 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7201 if (flg_row) { 7202 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 7203 pcbddc->computed_rowadj = PETSC_TRUE; 7204 } 7205 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7206 rcsr = PETSC_TRUE; 7207 } 7208 if (pcbddc->dbg_flag) { 7209 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7210 } 7211 7212 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7213 PetscReal *lcoords; 7214 PetscInt n; 7215 MPI_Datatype dimrealtype; 7216 7217 /* TODO: support for blocked */ 7218 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7219 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7220 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 7221 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRMPI(ierr); 7222 ierr = MPI_Type_commit(&dimrealtype);CHKERRMPI(ierr); 7223 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);CHKERRQ(ierr); 7224 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);CHKERRQ(ierr); 7225 ierr = MPI_Type_free(&dimrealtype);CHKERRMPI(ierr); 7226 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 7227 7228 pcbddc->mat_graph->coords = lcoords; 7229 pcbddc->mat_graph->cloc = PETSC_TRUE; 7230 pcbddc->mat_graph->cnloc = n; 7231 } 7232 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7233 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7234 7235 /* Setup of Graph */ 7236 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7237 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7238 7239 /* attach info on disconnected subdomains if present */ 7240 if (pcbddc->n_local_subs) { 7241 PetscInt *local_subs,n,totn; 7242 7243 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7244 ierr = PetscMalloc1(n,&local_subs);CHKERRQ(ierr); 7245 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7246 for (i=0;i<pcbddc->n_local_subs;i++) { 7247 const PetscInt *idxs; 7248 PetscInt nl,j; 7249 7250 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 7251 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7252 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7253 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7254 } 7255 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7256 pcbddc->mat_graph->n_local_subs = totn + 1; 7257 pcbddc->mat_graph->local_subs = local_subs; 7258 } 7259 } 7260 7261 if (!pcbddc->graphanalyzed) { 7262 /* Graph's connected components analysis */ 7263 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 7264 pcbddc->graphanalyzed = PETSC_TRUE; 7265 pcbddc->corner_selected = pcbddc->corner_selection; 7266 } 7267 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7268 PetscFunctionReturn(0); 7269 } 7270 7271 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7272 { 7273 PetscInt i,j,n; 7274 PetscScalar *alphas; 7275 PetscReal norm,*onorms; 7276 PetscErrorCode ierr; 7277 7278 PetscFunctionBegin; 7279 n = *nio; 7280 if (!n) PetscFunctionReturn(0); 7281 ierr = PetscMalloc2(n,&alphas,n,&onorms);CHKERRQ(ierr); 7282 ierr = VecNormalize(vecs[0],&norm);CHKERRQ(ierr); 7283 if (norm < PETSC_SMALL) { 7284 onorms[0] = 0.0; 7285 ierr = VecSet(vecs[0],0.0);CHKERRQ(ierr); 7286 } else { 7287 onorms[0] = norm; 7288 } 7289 7290 for (i=1;i<n;i++) { 7291 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 7292 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7293 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 7294 ierr = VecNormalize(vecs[i],&norm);CHKERRQ(ierr); 7295 if (norm < PETSC_SMALL) { 7296 onorms[i] = 0.0; 7297 ierr = VecSet(vecs[i],0.0);CHKERRQ(ierr); 7298 } else { 7299 onorms[i] = norm; 7300 } 7301 } 7302 /* push nonzero vectors at the beginning */ 7303 for (i=0;i<n;i++) { 7304 if (onorms[i] == 0.0) { 7305 for (j=i+1;j<n;j++) { 7306 if (onorms[j] != 0.0) { 7307 ierr = VecCopy(vecs[j],vecs[i]);CHKERRQ(ierr); 7308 onorms[j] = 0.0; 7309 } 7310 } 7311 } 7312 } 7313 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7314 ierr = PetscFree2(alphas,onorms);CHKERRQ(ierr); 7315 PetscFunctionReturn(0); 7316 } 7317 7318 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7319 { 7320 Mat A; 7321 PetscInt n_neighs,*neighs,*n_shared,**shared; 7322 PetscMPIInt size,rank,color; 7323 PetscInt *xadj,*adjncy; 7324 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7325 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7326 PetscInt void_procs,*procs_candidates = NULL; 7327 PetscInt xadj_count,*count; 7328 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7329 PetscSubcomm psubcomm; 7330 MPI_Comm subcomm; 7331 PetscErrorCode ierr; 7332 7333 PetscFunctionBegin; 7334 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7335 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7336 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7337 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7338 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7339 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7340 7341 if (have_void) *have_void = PETSC_FALSE; 7342 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRMPI(ierr); 7343 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 7344 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7345 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7346 im_active = !!n; 7347 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 7348 void_procs = size - active_procs; 7349 /* get ranks of of non-active processes in mat communicator */ 7350 if (void_procs) { 7351 PetscInt ncand; 7352 7353 if (have_void) *have_void = PETSC_TRUE; 7354 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7355 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 7356 for (i=0,ncand=0;i<size;i++) { 7357 if (!procs_candidates[i]) { 7358 procs_candidates[ncand++] = i; 7359 } 7360 } 7361 /* force n_subdomains to be not greater that the number of non-active processes */ 7362 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7363 } 7364 7365 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7366 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7367 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7368 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7369 PetscInt issize,isidx,dest; 7370 if (*n_subdomains == 1) dest = 0; 7371 else dest = rank; 7372 if (im_active) { 7373 issize = 1; 7374 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7375 isidx = procs_candidates[dest]; 7376 } else { 7377 isidx = dest; 7378 } 7379 } else { 7380 issize = 0; 7381 isidx = -1; 7382 } 7383 if (*n_subdomains != 1) *n_subdomains = active_procs; 7384 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7385 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7386 PetscFunctionReturn(0); 7387 } 7388 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7389 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7390 threshold = PetscMax(threshold,2); 7391 7392 /* Get info on mapping */ 7393 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7394 7395 /* build local CSR graph of subdomains' connectivity */ 7396 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7397 xadj[0] = 0; 7398 xadj[1] = PetscMax(n_neighs-1,0); 7399 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7400 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7401 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7402 for (i=1;i<n_neighs;i++) 7403 for (j=0;j<n_shared[i];j++) 7404 count[shared[i][j]] += 1; 7405 7406 xadj_count = 0; 7407 for (i=1;i<n_neighs;i++) { 7408 for (j=0;j<n_shared[i];j++) { 7409 if (count[shared[i][j]] < threshold) { 7410 adjncy[xadj_count] = neighs[i]; 7411 adjncy_wgt[xadj_count] = n_shared[i]; 7412 xadj_count++; 7413 break; 7414 } 7415 } 7416 } 7417 xadj[1] = xadj_count; 7418 ierr = PetscFree(count);CHKERRQ(ierr); 7419 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7420 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7421 7422 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7423 7424 /* Restrict work on active processes only */ 7425 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7426 if (void_procs) { 7427 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7428 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7429 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7430 subcomm = PetscSubcommChild(psubcomm); 7431 } else { 7432 psubcomm = NULL; 7433 subcomm = PetscObjectComm((PetscObject)mat); 7434 } 7435 7436 v_wgt = NULL; 7437 if (!color) { 7438 ierr = PetscFree(xadj);CHKERRQ(ierr); 7439 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7440 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7441 } else { 7442 Mat subdomain_adj; 7443 IS new_ranks,new_ranks_contig; 7444 MatPartitioning partitioner; 7445 PetscInt rstart=0,rend=0; 7446 PetscInt *is_indices,*oldranks; 7447 PetscMPIInt size; 7448 PetscBool aggregate; 7449 7450 ierr = MPI_Comm_size(subcomm,&size);CHKERRMPI(ierr); 7451 if (void_procs) { 7452 PetscInt prank = rank; 7453 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7454 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRMPI(ierr); 7455 for (i=0;i<xadj[1];i++) { 7456 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7457 } 7458 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7459 } else { 7460 oldranks = NULL; 7461 } 7462 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7463 if (aggregate) { /* TODO: all this part could be made more efficient */ 7464 PetscInt lrows,row,ncols,*cols; 7465 PetscMPIInt nrank; 7466 PetscScalar *vals; 7467 7468 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRMPI(ierr); 7469 lrows = 0; 7470 if (nrank<redprocs) { 7471 lrows = size/redprocs; 7472 if (nrank<size%redprocs) lrows++; 7473 } 7474 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7475 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7476 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7477 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7478 row = nrank; 7479 ncols = xadj[1]-xadj[0]; 7480 cols = adjncy; 7481 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7482 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7483 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7484 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7485 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7486 ierr = PetscFree(xadj);CHKERRQ(ierr); 7487 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7488 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7489 ierr = PetscFree(vals);CHKERRQ(ierr); 7490 if (use_vwgt) { 7491 Vec v; 7492 const PetscScalar *array; 7493 PetscInt nl; 7494 7495 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7496 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7497 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7498 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7499 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7500 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7501 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7502 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7503 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7504 ierr = VecDestroy(&v);CHKERRQ(ierr); 7505 } 7506 } else { 7507 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7508 if (use_vwgt) { 7509 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7510 v_wgt[0] = n; 7511 } 7512 } 7513 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7514 7515 /* Partition */ 7516 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7517 #if defined(PETSC_HAVE_PTSCOTCH) 7518 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);CHKERRQ(ierr); 7519 #elif defined(PETSC_HAVE_PARMETIS) 7520 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);CHKERRQ(ierr); 7521 #else 7522 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);CHKERRQ(ierr); 7523 #endif 7524 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7525 if (v_wgt) { 7526 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7527 } 7528 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7529 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7530 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7531 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7532 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7533 7534 /* renumber new_ranks to avoid "holes" in new set of processors */ 7535 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7536 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7537 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7538 if (!aggregate) { 7539 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7540 if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7541 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7542 } else if (oldranks) { 7543 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7544 } else { 7545 ranks_send_to_idx[0] = is_indices[0]; 7546 } 7547 } else { 7548 PetscInt idx = 0; 7549 PetscMPIInt tag; 7550 MPI_Request *reqs; 7551 7552 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7553 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7554 for (i=rstart;i<rend;i++) { 7555 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRMPI(ierr); 7556 } 7557 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRMPI(ierr); 7558 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 7559 ierr = PetscFree(reqs);CHKERRQ(ierr); 7560 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7561 if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7562 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7563 } else if (oldranks) { 7564 ranks_send_to_idx[0] = oldranks[idx]; 7565 } else { 7566 ranks_send_to_idx[0] = idx; 7567 } 7568 } 7569 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7570 /* clean up */ 7571 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7572 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7573 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7574 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7575 } 7576 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7577 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7578 7579 /* assemble parallel IS for sends */ 7580 i = 1; 7581 if (!color) i=0; 7582 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7583 PetscFunctionReturn(0); 7584 } 7585 7586 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7587 7588 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7589 { 7590 Mat local_mat; 7591 IS is_sends_internal; 7592 PetscInt rows,cols,new_local_rows; 7593 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7594 PetscBool ismatis,isdense,newisdense,destroy_mat; 7595 ISLocalToGlobalMapping l2gmap; 7596 PetscInt* l2gmap_indices; 7597 const PetscInt* is_indices; 7598 MatType new_local_type; 7599 /* buffers */ 7600 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7601 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7602 PetscInt *recv_buffer_idxs_local; 7603 PetscScalar *ptr_vals,*recv_buffer_vals; 7604 const PetscScalar *send_buffer_vals; 7605 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7606 /* MPI */ 7607 MPI_Comm comm,comm_n; 7608 PetscSubcomm subcomm; 7609 PetscMPIInt n_sends,n_recvs,size; 7610 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7611 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7612 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7613 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7614 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7615 PetscErrorCode ierr; 7616 7617 PetscFunctionBegin; 7618 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7619 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7620 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7621 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7622 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7623 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7624 PetscValidLogicalCollectiveBool(mat,reuse,6); 7625 PetscValidLogicalCollectiveInt(mat,nis,8); 7626 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7627 if (nvecs) { 7628 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7629 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7630 } 7631 /* further checks */ 7632 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7633 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7634 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7635 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7636 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7637 if (reuse && *mat_n) { 7638 PetscInt mrows,mcols,mnrows,mncols; 7639 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7640 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7641 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7642 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7643 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7644 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7645 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7646 } 7647 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7648 PetscValidLogicalCollectiveInt(mat,bs,1); 7649 7650 /* prepare IS for sending if not provided */ 7651 if (!is_sends) { 7652 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7653 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7654 } else { 7655 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7656 is_sends_internal = is_sends; 7657 } 7658 7659 /* get comm */ 7660 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7661 7662 /* compute number of sends */ 7663 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7664 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7665 7666 /* compute number of receives */ 7667 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 7668 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 7669 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 7670 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7671 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7672 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7673 ierr = PetscFree(iflags);CHKERRQ(ierr); 7674 7675 /* restrict comm if requested */ 7676 subcomm = NULL; 7677 destroy_mat = PETSC_FALSE; 7678 if (restrict_comm) { 7679 PetscMPIInt color,subcommsize; 7680 7681 color = 0; 7682 if (restrict_full) { 7683 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7684 } else { 7685 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7686 } 7687 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr); 7688 subcommsize = size - subcommsize; 7689 /* check if reuse has been requested */ 7690 if (reuse) { 7691 if (*mat_n) { 7692 PetscMPIInt subcommsize2; 7693 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRMPI(ierr); 7694 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7695 comm_n = PetscObjectComm((PetscObject)*mat_n); 7696 } else { 7697 comm_n = PETSC_COMM_SELF; 7698 } 7699 } else { /* MAT_INITIAL_MATRIX */ 7700 PetscMPIInt rank; 7701 7702 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 7703 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7704 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7705 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7706 comm_n = PetscSubcommChild(subcomm); 7707 } 7708 /* flag to destroy *mat_n if not significative */ 7709 if (color) destroy_mat = PETSC_TRUE; 7710 } else { 7711 comm_n = comm; 7712 } 7713 7714 /* prepare send/receive buffers */ 7715 ierr = PetscMalloc1(size,&ilengths_idxs);CHKERRQ(ierr); 7716 ierr = PetscArrayzero(ilengths_idxs,size);CHKERRQ(ierr); 7717 ierr = PetscMalloc1(size,&ilengths_vals);CHKERRQ(ierr); 7718 ierr = PetscArrayzero(ilengths_vals,size);CHKERRQ(ierr); 7719 if (nis) { 7720 ierr = PetscCalloc1(size,&ilengths_idxs_is);CHKERRQ(ierr); 7721 } 7722 7723 /* Get data from local matrices */ 7724 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7725 /* TODO: See below some guidelines on how to prepare the local buffers */ 7726 /* 7727 send_buffer_vals should contain the raw values of the local matrix 7728 send_buffer_idxs should contain: 7729 - MatType_PRIVATE type 7730 - PetscInt size_of_l2gmap 7731 - PetscInt global_row_indices[size_of_l2gmap] 7732 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7733 */ 7734 else { 7735 ierr = MatDenseGetArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7736 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7737 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7738 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7739 send_buffer_idxs[1] = i; 7740 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7741 ierr = PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);CHKERRQ(ierr); 7742 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7743 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7744 for (i=0;i<n_sends;i++) { 7745 ilengths_vals[is_indices[i]] = len*len; 7746 ilengths_idxs[is_indices[i]] = len+2; 7747 } 7748 } 7749 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7750 /* additional is (if any) */ 7751 if (nis) { 7752 PetscMPIInt psum; 7753 PetscInt j; 7754 for (j=0,psum=0;j<nis;j++) { 7755 PetscInt plen; 7756 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7757 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7758 psum += len+1; /* indices + lenght */ 7759 } 7760 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7761 for (j=0,psum=0;j<nis;j++) { 7762 PetscInt plen; 7763 const PetscInt *is_array_idxs; 7764 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7765 send_buffer_idxs_is[psum] = plen; 7766 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7767 ierr = PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);CHKERRQ(ierr); 7768 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7769 psum += plen+1; /* indices + lenght */ 7770 } 7771 for (i=0;i<n_sends;i++) { 7772 ilengths_idxs_is[is_indices[i]] = psum; 7773 } 7774 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7775 } 7776 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7777 7778 buf_size_idxs = 0; 7779 buf_size_vals = 0; 7780 buf_size_idxs_is = 0; 7781 buf_size_vecs = 0; 7782 for (i=0;i<n_recvs;i++) { 7783 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7784 buf_size_vals += (PetscInt)olengths_vals[i]; 7785 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7786 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7787 } 7788 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7789 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7790 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7791 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7792 7793 /* get new tags for clean communications */ 7794 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7795 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7796 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7797 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7798 7799 /* allocate for requests */ 7800 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7801 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7802 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7803 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7804 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7805 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7806 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7807 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7808 7809 /* communications */ 7810 ptr_idxs = recv_buffer_idxs; 7811 ptr_vals = recv_buffer_vals; 7812 ptr_idxs_is = recv_buffer_idxs_is; 7813 ptr_vecs = recv_buffer_vecs; 7814 for (i=0;i<n_recvs;i++) { 7815 source_dest = onodes[i]; 7816 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRMPI(ierr); 7817 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRMPI(ierr); 7818 ptr_idxs += olengths_idxs[i]; 7819 ptr_vals += olengths_vals[i]; 7820 if (nis) { 7821 source_dest = onodes_is[i]; 7822 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRMPI(ierr); 7823 ptr_idxs_is += olengths_idxs_is[i]; 7824 } 7825 if (nvecs) { 7826 source_dest = onodes[i]; 7827 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRMPI(ierr); 7828 ptr_vecs += olengths_idxs[i]-2; 7829 } 7830 } 7831 for (i=0;i<n_sends;i++) { 7832 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7833 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRMPI(ierr); 7834 ierr = MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRMPI(ierr); 7835 if (nis) { 7836 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRMPI(ierr); 7837 } 7838 if (nvecs) { 7839 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7840 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRMPI(ierr); 7841 } 7842 } 7843 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7844 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7845 7846 /* assemble new l2g map */ 7847 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 7848 ptr_idxs = recv_buffer_idxs; 7849 new_local_rows = 0; 7850 for (i=0;i<n_recvs;i++) { 7851 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7852 ptr_idxs += olengths_idxs[i]; 7853 } 7854 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7855 ptr_idxs = recv_buffer_idxs; 7856 new_local_rows = 0; 7857 for (i=0;i<n_recvs;i++) { 7858 ierr = PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));CHKERRQ(ierr); 7859 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7860 ptr_idxs += olengths_idxs[i]; 7861 } 7862 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7863 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7864 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7865 7866 /* infer new local matrix type from received local matrices type */ 7867 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7868 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7869 if (n_recvs) { 7870 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7871 ptr_idxs = recv_buffer_idxs; 7872 for (i=0;i<n_recvs;i++) { 7873 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7874 new_local_type_private = MATAIJ_PRIVATE; 7875 break; 7876 } 7877 ptr_idxs += olengths_idxs[i]; 7878 } 7879 switch (new_local_type_private) { 7880 case MATDENSE_PRIVATE: 7881 new_local_type = MATSEQAIJ; 7882 bs = 1; 7883 break; 7884 case MATAIJ_PRIVATE: 7885 new_local_type = MATSEQAIJ; 7886 bs = 1; 7887 break; 7888 case MATBAIJ_PRIVATE: 7889 new_local_type = MATSEQBAIJ; 7890 break; 7891 case MATSBAIJ_PRIVATE: 7892 new_local_type = MATSEQSBAIJ; 7893 break; 7894 default: 7895 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7896 } 7897 } else { /* by default, new_local_type is seqaij */ 7898 new_local_type = MATSEQAIJ; 7899 bs = 1; 7900 } 7901 7902 /* create MATIS object if needed */ 7903 if (!reuse) { 7904 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7905 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7906 } else { 7907 /* it also destroys the local matrices */ 7908 if (*mat_n) { 7909 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7910 } else { /* this is a fake object */ 7911 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7912 } 7913 } 7914 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7915 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7916 7917 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 7918 7919 /* Global to local map of received indices */ 7920 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7921 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7922 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7923 7924 /* restore attributes -> type of incoming data and its size */ 7925 buf_size_idxs = 0; 7926 for (i=0;i<n_recvs;i++) { 7927 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7928 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7929 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7930 } 7931 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7932 7933 /* set preallocation */ 7934 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7935 if (!newisdense) { 7936 PetscInt *new_local_nnz=NULL; 7937 7938 ptr_idxs = recv_buffer_idxs_local; 7939 if (n_recvs) { 7940 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7941 } 7942 for (i=0;i<n_recvs;i++) { 7943 PetscInt j; 7944 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7945 for (j=0;j<*(ptr_idxs+1);j++) { 7946 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7947 } 7948 } else { 7949 /* TODO */ 7950 } 7951 ptr_idxs += olengths_idxs[i]; 7952 } 7953 if (new_local_nnz) { 7954 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7955 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7956 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7957 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7958 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7959 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7960 } else { 7961 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7962 } 7963 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7964 } else { 7965 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7966 } 7967 7968 /* set values */ 7969 ptr_vals = recv_buffer_vals; 7970 ptr_idxs = recv_buffer_idxs_local; 7971 for (i=0;i<n_recvs;i++) { 7972 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7973 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7974 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7975 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7976 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7977 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7978 } else { 7979 /* TODO */ 7980 } 7981 ptr_idxs += olengths_idxs[i]; 7982 ptr_vals += olengths_vals[i]; 7983 } 7984 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7985 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7986 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7987 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7988 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7989 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7990 7991 #if 0 7992 if (!restrict_comm) { /* check */ 7993 Vec lvec,rvec; 7994 PetscReal infty_error; 7995 7996 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7997 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7998 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7999 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 8000 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 8001 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8002 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);CHKERRQ(ierr); 8003 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 8004 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 8005 } 8006 #endif 8007 8008 /* assemble new additional is (if any) */ 8009 if (nis) { 8010 PetscInt **temp_idxs,*count_is,j,psum; 8011 8012 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8013 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 8014 ptr_idxs = recv_buffer_idxs_is; 8015 psum = 0; 8016 for (i=0;i<n_recvs;i++) { 8017 for (j=0;j<nis;j++) { 8018 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8019 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8020 psum += plen; 8021 ptr_idxs += plen+1; /* shift pointer to received data */ 8022 } 8023 } 8024 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 8025 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 8026 for (i=1;i<nis;i++) { 8027 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 8028 } 8029 ierr = PetscArrayzero(count_is,nis);CHKERRQ(ierr); 8030 ptr_idxs = recv_buffer_idxs_is; 8031 for (i=0;i<n_recvs;i++) { 8032 for (j=0;j<nis;j++) { 8033 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8034 ierr = PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);CHKERRQ(ierr); 8035 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8036 ptr_idxs += plen+1; /* shift pointer to received data */ 8037 } 8038 } 8039 for (i=0;i<nis;i++) { 8040 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8041 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr); 8042 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8043 } 8044 ierr = PetscFree(count_is);CHKERRQ(ierr); 8045 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 8046 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 8047 } 8048 /* free workspace */ 8049 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 8050 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8051 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 8052 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8053 if (isdense) { 8054 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 8055 ierr = MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 8056 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 8057 } else { 8058 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 8059 } 8060 if (nis) { 8061 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8062 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 8063 } 8064 8065 if (nvecs) { 8066 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8067 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8068 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8069 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8070 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 8071 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 8072 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 8073 /* set values */ 8074 ptr_vals = recv_buffer_vecs; 8075 ptr_idxs = recv_buffer_idxs_local; 8076 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8077 for (i=0;i<n_recvs;i++) { 8078 PetscInt j; 8079 for (j=0;j<*(ptr_idxs+1);j++) { 8080 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8081 } 8082 ptr_idxs += olengths_idxs[i]; 8083 ptr_vals += olengths_idxs[i]-2; 8084 } 8085 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8086 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 8087 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 8088 } 8089 8090 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 8091 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 8092 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 8093 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 8094 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 8095 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 8096 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 8097 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 8098 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 8099 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 8100 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 8101 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 8102 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 8103 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 8104 ierr = PetscFree(onodes);CHKERRQ(ierr); 8105 if (nis) { 8106 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 8107 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 8108 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 8109 } 8110 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 8111 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 8112 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 8113 for (i=0;i<nis;i++) { 8114 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8115 } 8116 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8117 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8118 } 8119 *mat_n = NULL; 8120 } 8121 PetscFunctionReturn(0); 8122 } 8123 8124 /* temporary hack into ksp private data structure */ 8125 #include <petsc/private/kspimpl.h> 8126 8127 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8128 { 8129 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8130 PC_IS *pcis = (PC_IS*)pc->data; 8131 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8132 Mat coarsedivudotp = NULL; 8133 Mat coarseG,t_coarse_mat_is; 8134 MatNullSpace CoarseNullSpace = NULL; 8135 ISLocalToGlobalMapping coarse_islg; 8136 IS coarse_is,*isarray,corners; 8137 PetscInt i,im_active=-1,active_procs=-1; 8138 PetscInt nis,nisdofs,nisneu,nisvert; 8139 PetscInt coarse_eqs_per_proc; 8140 PC pc_temp; 8141 PCType coarse_pc_type; 8142 KSPType coarse_ksp_type; 8143 PetscBool multilevel_requested,multilevel_allowed; 8144 PetscBool coarse_reuse; 8145 PetscInt ncoarse,nedcfield; 8146 PetscBool compute_vecs = PETSC_FALSE; 8147 PetscScalar *array; 8148 MatReuse coarse_mat_reuse; 8149 PetscBool restr, full_restr, have_void; 8150 PetscMPIInt size; 8151 PetscErrorCode ierr; 8152 8153 PetscFunctionBegin; 8154 ierr = PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8155 /* Assign global numbering to coarse dofs */ 8156 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8157 PetscInt ocoarse_size; 8158 compute_vecs = PETSC_TRUE; 8159 8160 pcbddc->new_primal_space = PETSC_TRUE; 8161 ocoarse_size = pcbddc->coarse_size; 8162 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 8163 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 8164 /* see if we can avoid some work */ 8165 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8166 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8167 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8168 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 8169 coarse_reuse = PETSC_FALSE; 8170 } else { /* we can safely reuse already computed coarse matrix */ 8171 coarse_reuse = PETSC_TRUE; 8172 } 8173 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8174 coarse_reuse = PETSC_FALSE; 8175 } 8176 /* reset any subassembling information */ 8177 if (!coarse_reuse || pcbddc->recompute_topography) { 8178 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8179 } 8180 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8181 coarse_reuse = PETSC_TRUE; 8182 } 8183 if (coarse_reuse && pcbddc->coarse_ksp) { 8184 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 8185 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 8186 coarse_mat_reuse = MAT_REUSE_MATRIX; 8187 } else { 8188 coarse_mat = NULL; 8189 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8190 } 8191 8192 /* creates temporary l2gmap and IS for coarse indexes */ 8193 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 8194 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 8195 8196 /* creates temporary MATIS object for coarse matrix */ 8197 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 8198 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 8199 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 8200 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8201 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8202 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 8203 8204 /* count "active" (i.e. with positive local size) and "void" processes */ 8205 im_active = !!(pcis->n); 8206 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 8207 8208 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8209 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8210 /* full_restr : just use the receivers from the subassembling pattern */ 8211 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 8212 coarse_mat_is = NULL; 8213 multilevel_allowed = PETSC_FALSE; 8214 multilevel_requested = PETSC_FALSE; 8215 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8216 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8217 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8218 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8219 if (multilevel_requested) { 8220 ncoarse = active_procs/pcbddc->coarsening_ratio; 8221 restr = PETSC_FALSE; 8222 full_restr = PETSC_FALSE; 8223 } else { 8224 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8225 restr = PETSC_TRUE; 8226 full_restr = PETSC_TRUE; 8227 } 8228 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8229 ncoarse = PetscMax(1,ncoarse); 8230 if (!pcbddc->coarse_subassembling) { 8231 if (pcbddc->coarsening_ratio > 1) { 8232 if (multilevel_requested) { 8233 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8234 } else { 8235 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8236 } 8237 } else { 8238 PetscMPIInt rank; 8239 8240 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 8241 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8242 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8243 } 8244 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8245 PetscInt psum; 8246 if (pcbddc->coarse_ksp) psum = 1; 8247 else psum = 0; 8248 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 8249 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8250 } 8251 /* determine if we can go multilevel */ 8252 if (multilevel_requested) { 8253 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8254 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8255 } 8256 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8257 8258 /* dump subassembling pattern */ 8259 if (pcbddc->dbg_flag && multilevel_allowed) { 8260 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 8261 } 8262 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8263 nedcfield = -1; 8264 corners = NULL; 8265 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8266 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8267 const PetscInt *idxs; 8268 ISLocalToGlobalMapping tmap; 8269 8270 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8271 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 8272 /* allocate space for temporary storage */ 8273 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 8274 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 8275 /* allocate for IS array */ 8276 nisdofs = pcbddc->n_ISForDofsLocal; 8277 if (pcbddc->nedclocal) { 8278 if (pcbddc->nedfield > -1) { 8279 nedcfield = pcbddc->nedfield; 8280 } else { 8281 nedcfield = 0; 8282 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8283 nisdofs = 1; 8284 } 8285 } 8286 nisneu = !!pcbddc->NeumannBoundariesLocal; 8287 nisvert = 0; /* nisvert is not used */ 8288 nis = nisdofs + nisneu + nisvert; 8289 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 8290 /* dofs splitting */ 8291 for (i=0;i<nisdofs;i++) { 8292 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 8293 if (nedcfield != i) { 8294 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 8295 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8296 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8297 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8298 } else { 8299 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 8300 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8301 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8302 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8303 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8304 } 8305 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8306 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8307 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 8308 } 8309 /* neumann boundaries */ 8310 if (pcbddc->NeumannBoundariesLocal) { 8311 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 8312 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 8313 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8314 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8315 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8316 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8317 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8318 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8319 } 8320 /* coordinates */ 8321 if (pcbddc->corner_selected) { 8322 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8323 ierr = ISGetLocalSize(corners,&tsize);CHKERRQ(ierr); 8324 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8325 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8326 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout); 8327 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8328 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8329 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8330 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);CHKERRQ(ierr); 8331 } 8332 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8333 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8334 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8335 } else { 8336 nis = 0; 8337 nisdofs = 0; 8338 nisneu = 0; 8339 nisvert = 0; 8340 isarray = NULL; 8341 } 8342 /* destroy no longer needed map */ 8343 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8344 8345 /* subassemble */ 8346 if (multilevel_allowed) { 8347 Vec vp[1]; 8348 PetscInt nvecs = 0; 8349 PetscBool reuse,reuser; 8350 8351 if (coarse_mat) reuse = PETSC_TRUE; 8352 else reuse = PETSC_FALSE; 8353 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 8354 vp[0] = NULL; 8355 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8356 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8357 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8358 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8359 nvecs = 1; 8360 8361 if (pcbddc->divudotp) { 8362 Mat B,loc_divudotp; 8363 Vec v,p; 8364 IS dummy; 8365 PetscInt np; 8366 8367 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8368 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8369 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8370 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8371 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8372 ierr = VecSet(p,1.);CHKERRQ(ierr); 8373 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8374 ierr = VecDestroy(&p);CHKERRQ(ierr); 8375 ierr = MatDestroy(&B);CHKERRQ(ierr); 8376 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8377 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8378 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8379 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8380 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8381 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8382 ierr = VecDestroy(&v);CHKERRQ(ierr); 8383 } 8384 } 8385 if (reuser) { 8386 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8387 } else { 8388 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8389 } 8390 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8391 PetscScalar *arraym; 8392 const PetscScalar *arrayv; 8393 PetscInt nl; 8394 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8395 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8396 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8397 ierr = VecGetArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8398 ierr = PetscArraycpy(arraym,arrayv,nl);CHKERRQ(ierr); 8399 ierr = VecRestoreArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8400 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8401 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8402 } else { 8403 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8404 } 8405 } else { 8406 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8407 } 8408 if (coarse_mat_is || coarse_mat) { 8409 if (!multilevel_allowed) { 8410 ierr = MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8411 } else { 8412 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8413 if (coarse_mat_is) { 8414 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8415 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8416 coarse_mat = coarse_mat_is; 8417 } 8418 } 8419 } 8420 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8421 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8422 8423 /* create local to global scatters for coarse problem */ 8424 if (compute_vecs) { 8425 PetscInt lrows; 8426 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8427 if (coarse_mat) { 8428 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8429 } else { 8430 lrows = 0; 8431 } 8432 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8433 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8434 ierr = VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);CHKERRQ(ierr); 8435 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8436 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8437 } 8438 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8439 8440 /* set defaults for coarse KSP and PC */ 8441 if (multilevel_allowed) { 8442 coarse_ksp_type = KSPRICHARDSON; 8443 coarse_pc_type = PCBDDC; 8444 } else { 8445 coarse_ksp_type = KSPPREONLY; 8446 coarse_pc_type = PCREDUNDANT; 8447 } 8448 8449 /* print some info if requested */ 8450 if (pcbddc->dbg_flag) { 8451 if (!multilevel_allowed) { 8452 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8453 if (multilevel_requested) { 8454 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8455 } else if (pcbddc->max_levels) { 8456 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);CHKERRQ(ierr); 8457 } 8458 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8459 } 8460 } 8461 8462 /* communicate coarse discrete gradient */ 8463 coarseG = NULL; 8464 if (pcbddc->nedcG && multilevel_allowed) { 8465 MPI_Comm ccomm; 8466 if (coarse_mat) { 8467 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8468 } else { 8469 ccomm = MPI_COMM_NULL; 8470 } 8471 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8472 } 8473 8474 /* create the coarse KSP object only once with defaults */ 8475 if (coarse_mat) { 8476 PetscBool isredundant,isbddc,force,valid; 8477 PetscViewer dbg_viewer = NULL; 8478 8479 if (pcbddc->dbg_flag) { 8480 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8481 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8482 } 8483 if (!pcbddc->coarse_ksp) { 8484 char prefix[256],str_level[16]; 8485 size_t len; 8486 8487 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8488 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8489 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8490 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8491 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8492 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8493 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8494 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8495 /* TODO is this logic correct? should check for coarse_mat type */ 8496 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8497 /* prefix */ 8498 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8499 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8500 if (!pcbddc->current_level) { 8501 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8502 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8503 } else { 8504 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8505 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8506 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8507 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8508 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8509 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8510 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8511 } 8512 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8513 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8514 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8515 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8516 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8517 /* allow user customization */ 8518 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8519 /* get some info after set from options */ 8520 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8521 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8522 force = PETSC_FALSE; 8523 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8524 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8525 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8526 if (multilevel_allowed && !force && !valid) { 8527 isbddc = PETSC_TRUE; 8528 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8529 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8530 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8531 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8532 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8533 ierr = PetscObjectOptionsBegin((PetscObject)pc_temp);CHKERRQ(ierr); 8534 ierr = (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);CHKERRQ(ierr); 8535 ierr = PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);CHKERRQ(ierr); 8536 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8537 pc_temp->setfromoptionscalled++; 8538 } 8539 } 8540 } 8541 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8542 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8543 if (nisdofs) { 8544 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8545 for (i=0;i<nisdofs;i++) { 8546 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8547 } 8548 } 8549 if (nisneu) { 8550 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8551 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8552 } 8553 if (nisvert) { 8554 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8555 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8556 } 8557 if (coarseG) { 8558 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8559 } 8560 8561 /* get some info after set from options */ 8562 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8563 8564 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8565 if (isbddc && !multilevel_allowed) { 8566 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8567 } 8568 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8569 force = PETSC_FALSE; 8570 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8571 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8572 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8573 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8574 } 8575 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8576 if (isredundant) { 8577 KSP inner_ksp; 8578 PC inner_pc; 8579 8580 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8581 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8582 } 8583 8584 /* parameters which miss an API */ 8585 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8586 if (isbddc) { 8587 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8588 8589 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8590 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8591 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8592 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8593 if (pcbddc_coarse->benign_saddle_point) { 8594 Mat coarsedivudotp_is; 8595 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8596 IS row,col; 8597 const PetscInt *gidxs; 8598 PetscInt n,st,M,N; 8599 8600 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8601 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRMPI(ierr); 8602 st = st-n; 8603 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8604 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8605 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8606 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8607 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8608 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8609 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8610 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8611 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8612 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8613 ierr = ISDestroy(&row);CHKERRQ(ierr); 8614 ierr = ISDestroy(&col);CHKERRQ(ierr); 8615 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8616 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8617 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8618 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8619 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8620 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8621 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8622 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8623 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8624 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8625 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8626 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8627 } 8628 } 8629 8630 /* propagate symmetry info of coarse matrix */ 8631 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8632 if (pc->pmat->symmetric_set) { 8633 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8634 } 8635 if (pc->pmat->hermitian_set) { 8636 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8637 } 8638 if (pc->pmat->spd_set) { 8639 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8640 } 8641 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8642 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8643 } 8644 /* set operators */ 8645 ierr = MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");CHKERRQ(ierr); 8646 ierr = MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 8647 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8648 if (pcbddc->dbg_flag) { 8649 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8650 } 8651 } 8652 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8653 ierr = PetscFree(isarray);CHKERRQ(ierr); 8654 #if 0 8655 { 8656 PetscViewer viewer; 8657 char filename[256]; 8658 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8659 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8660 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8661 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8662 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8663 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8664 } 8665 #endif 8666 8667 if (corners) { 8668 Vec gv; 8669 IS is; 8670 const PetscInt *idxs; 8671 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8672 PetscScalar *coords; 8673 8674 if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8675 ierr = VecGetSize(pcbddc->coarse_vec,&N);CHKERRQ(ierr); 8676 ierr = VecGetLocalSize(pcbddc->coarse_vec,&n);CHKERRQ(ierr); 8677 ierr = VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);CHKERRQ(ierr); 8678 ierr = VecSetBlockSize(gv,cdim);CHKERRQ(ierr); 8679 ierr = VecSetSizes(gv,n*cdim,N*cdim);CHKERRQ(ierr); 8680 ierr = VecSetType(gv,VECSTANDARD);CHKERRQ(ierr); 8681 ierr = VecSetFromOptions(gv);CHKERRQ(ierr); 8682 ierr = VecSet(gv,PETSC_MAX_REAL);CHKERRQ(ierr); /* we only propagate coordinates from vertices constraints */ 8683 8684 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8685 ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr); 8686 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 8687 ierr = PetscMalloc1(n*cdim,&coords);CHKERRQ(ierr); 8688 for (i=0;i<n;i++) { 8689 for (d=0;d<cdim;d++) { 8690 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8691 } 8692 } 8693 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 8694 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8695 8696 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 8697 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8698 ierr = VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);CHKERRQ(ierr); 8699 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8700 ierr = PetscFree(coords);CHKERRQ(ierr); 8701 ierr = VecAssemblyBegin(gv);CHKERRQ(ierr); 8702 ierr = VecAssemblyEnd(gv);CHKERRQ(ierr); 8703 ierr = VecGetArray(gv,&coords);CHKERRQ(ierr); 8704 if (pcbddc->coarse_ksp) { 8705 PC coarse_pc; 8706 PetscBool isbddc; 8707 8708 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 8709 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 8710 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8711 PetscReal *realcoords; 8712 8713 ierr = VecGetLocalSize(gv,&n);CHKERRQ(ierr); 8714 #if defined(PETSC_USE_COMPLEX) 8715 ierr = PetscMalloc1(n,&realcoords);CHKERRQ(ierr); 8716 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8717 #else 8718 realcoords = coords; 8719 #endif 8720 ierr = PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);CHKERRQ(ierr); 8721 #if defined(PETSC_USE_COMPLEX) 8722 ierr = PetscFree(realcoords);CHKERRQ(ierr); 8723 #endif 8724 } 8725 } 8726 ierr = VecRestoreArray(gv,&coords);CHKERRQ(ierr); 8727 ierr = VecDestroy(&gv);CHKERRQ(ierr); 8728 } 8729 ierr = ISDestroy(&corners);CHKERRQ(ierr); 8730 8731 if (pcbddc->coarse_ksp) { 8732 Vec crhs,csol; 8733 8734 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8735 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8736 if (!csol) { 8737 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8738 } 8739 if (!crhs) { 8740 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8741 } 8742 } 8743 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8744 8745 /* compute null space for coarse solver if the benign trick has been requested */ 8746 if (pcbddc->benign_null) { 8747 8748 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8749 for (i=0;i<pcbddc->benign_n;i++) { 8750 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8751 } 8752 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8753 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8754 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8755 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8756 if (coarse_mat) { 8757 Vec nullv; 8758 PetscScalar *array,*array2; 8759 PetscInt nl; 8760 8761 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8762 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8763 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8764 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8765 ierr = PetscArraycpy(array2,array,nl);CHKERRQ(ierr); 8766 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8767 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8768 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8769 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8770 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8771 } 8772 } 8773 ierr = PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8774 8775 ierr = PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8776 if (pcbddc->coarse_ksp) { 8777 PetscBool ispreonly; 8778 8779 if (CoarseNullSpace) { 8780 PetscBool isnull; 8781 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8782 if (isnull) { 8783 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8784 } 8785 /* TODO: add local nullspaces (if any) */ 8786 } 8787 /* setup coarse ksp */ 8788 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8789 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8790 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8791 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8792 KSP check_ksp; 8793 KSPType check_ksp_type; 8794 PC check_pc; 8795 Vec check_vec,coarse_vec; 8796 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8797 PetscInt its; 8798 PetscBool compute_eigs; 8799 PetscReal *eigs_r,*eigs_c; 8800 PetscInt neigs; 8801 const char *prefix; 8802 8803 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8804 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8805 ierr = PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);CHKERRQ(ierr); 8806 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 8807 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8808 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8809 /* prevent from setup unneeded object */ 8810 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8811 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8812 if (ispreonly) { 8813 check_ksp_type = KSPPREONLY; 8814 compute_eigs = PETSC_FALSE; 8815 } else { 8816 check_ksp_type = KSPGMRES; 8817 compute_eigs = PETSC_TRUE; 8818 } 8819 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8820 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8821 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8822 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8823 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8824 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8825 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8826 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8827 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8828 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8829 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8830 /* create random vec */ 8831 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8832 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8833 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8834 /* solve coarse problem */ 8835 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8836 ierr = KSPCheckSolve(check_ksp,pc,coarse_vec);CHKERRQ(ierr); 8837 /* set eigenvalue estimation if preonly has not been requested */ 8838 if (compute_eigs) { 8839 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8840 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8841 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8842 if (neigs) { 8843 lambda_max = eigs_r[neigs-1]; 8844 lambda_min = eigs_r[0]; 8845 if (pcbddc->use_coarse_estimates) { 8846 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8847 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8848 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8849 } 8850 } 8851 } 8852 } 8853 8854 /* check coarse problem residual error */ 8855 if (pcbddc->dbg_flag) { 8856 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8857 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8858 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8859 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8860 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8861 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8862 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8863 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8864 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8865 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8866 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8867 if (CoarseNullSpace) { 8868 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8869 } 8870 if (compute_eigs) { 8871 PetscReal lambda_max_s,lambda_min_s; 8872 KSPConvergedReason reason; 8873 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8874 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8875 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8876 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8877 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8878 for (i=0;i<neigs;i++) { 8879 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8880 } 8881 } 8882 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8883 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8884 } 8885 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8886 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8887 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8888 if (compute_eigs) { 8889 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8890 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8891 } 8892 } 8893 } 8894 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8895 /* print additional info */ 8896 if (pcbddc->dbg_flag) { 8897 /* waits until all processes reaches this point */ 8898 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8899 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);CHKERRQ(ierr); 8900 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8901 } 8902 8903 /* free memory */ 8904 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8905 ierr = PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8906 PetscFunctionReturn(0); 8907 } 8908 8909 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8910 { 8911 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8912 PC_IS* pcis = (PC_IS*)pc->data; 8913 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8914 IS subset,subset_mult,subset_n; 8915 PetscInt local_size,coarse_size=0; 8916 PetscInt *local_primal_indices=NULL; 8917 const PetscInt *t_local_primal_indices; 8918 PetscErrorCode ierr; 8919 8920 PetscFunctionBegin; 8921 /* Compute global number of coarse dofs */ 8922 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8923 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8924 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8925 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8926 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8927 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8928 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8929 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8930 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8931 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8932 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8933 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8934 ierr = PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);CHKERRQ(ierr); 8935 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8936 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8937 8938 /* check numbering */ 8939 if (pcbddc->dbg_flag) { 8940 PetscScalar coarsesum,*array,*array2; 8941 PetscInt i; 8942 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8943 8944 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8945 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8946 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8947 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8948 /* counter */ 8949 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8950 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8951 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8952 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8953 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8954 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8955 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8956 for (i=0;i<pcbddc->local_primal_size;i++) { 8957 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8958 } 8959 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8960 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8961 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8962 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8963 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8964 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8965 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8966 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8967 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8968 for (i=0;i<pcis->n;i++) { 8969 if (array[i] != 0.0 && array[i] != array2[i]) { 8970 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8971 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8972 set_error = PETSC_TRUE; 8973 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8975 } 8976 } 8977 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8978 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 8979 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8980 for (i=0;i<pcis->n;i++) { 8981 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8982 } 8983 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8984 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8985 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8986 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8987 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8988 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8989 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8990 PetscInt *gidxs; 8991 8992 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8993 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8994 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8995 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8996 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8997 for (i=0;i<pcbddc->local_primal_size;i++) { 8998 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8999 } 9000 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9001 ierr = PetscFree(gidxs);CHKERRQ(ierr); 9002 } 9003 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9004 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9005 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 9006 } 9007 9008 /* get back data */ 9009 *coarse_size_n = coarse_size; 9010 *local_primal_indices_n = local_primal_indices; 9011 PetscFunctionReturn(0); 9012 } 9013 9014 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 9015 { 9016 IS localis_t; 9017 PetscInt i,lsize,*idxs,n; 9018 PetscScalar *vals; 9019 PetscErrorCode ierr; 9020 9021 PetscFunctionBegin; 9022 /* get indices in local ordering exploiting local to global map */ 9023 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 9024 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 9025 for (i=0;i<lsize;i++) vals[i] = 1.0; 9026 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9027 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 9028 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 9029 if (idxs) { /* multilevel guard */ 9030 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 9031 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 9032 } 9033 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 9034 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9035 ierr = PetscFree(vals);CHKERRQ(ierr); 9036 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 9037 /* now compute set in local ordering */ 9038 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9039 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9040 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9041 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 9042 for (i=0,lsize=0;i<n;i++) { 9043 if (PetscRealPart(vals[i]) > 0.5) { 9044 lsize++; 9045 } 9046 } 9047 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 9048 for (i=0,lsize=0;i<n;i++) { 9049 if (PetscRealPart(vals[i]) > 0.5) { 9050 idxs[lsize++] = i; 9051 } 9052 } 9053 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9054 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 9055 *localis = localis_t; 9056 PetscFunctionReturn(0); 9057 } 9058 9059 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9060 { 9061 PC_IS *pcis=(PC_IS*)pc->data; 9062 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9063 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9064 Mat S_j; 9065 PetscInt *used_xadj,*used_adjncy; 9066 PetscBool free_used_adj; 9067 PetscErrorCode ierr; 9068 9069 PetscFunctionBegin; 9070 ierr = PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9071 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9072 free_used_adj = PETSC_FALSE; 9073 if (pcbddc->sub_schurs_layers == -1) { 9074 used_xadj = NULL; 9075 used_adjncy = NULL; 9076 } else { 9077 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9078 used_xadj = pcbddc->mat_graph->xadj; 9079 used_adjncy = pcbddc->mat_graph->adjncy; 9080 } else if (pcbddc->computed_rowadj) { 9081 used_xadj = pcbddc->mat_graph->xadj; 9082 used_adjncy = pcbddc->mat_graph->adjncy; 9083 } else { 9084 PetscBool flg_row=PETSC_FALSE; 9085 const PetscInt *xadj,*adjncy; 9086 PetscInt nvtxs; 9087 9088 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9089 if (flg_row) { 9090 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 9091 ierr = PetscArraycpy(used_xadj,xadj,nvtxs+1);CHKERRQ(ierr); 9092 ierr = PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);CHKERRQ(ierr); 9093 free_used_adj = PETSC_TRUE; 9094 } else { 9095 pcbddc->sub_schurs_layers = -1; 9096 used_xadj = NULL; 9097 used_adjncy = NULL; 9098 } 9099 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9100 } 9101 } 9102 9103 /* setup sub_schurs data */ 9104 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9105 if (!sub_schurs->schur_explicit) { 9106 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9107 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9108 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 9109 } else { 9110 Mat change = NULL; 9111 Vec scaling = NULL; 9112 IS change_primal = NULL, iP; 9113 PetscInt benign_n; 9114 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9115 PetscBool need_change = PETSC_FALSE; 9116 PetscBool discrete_harmonic = PETSC_FALSE; 9117 9118 if (!pcbddc->use_vertices && reuse_solvers) { 9119 PetscInt n_vertices; 9120 9121 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 9122 reuse_solvers = (PetscBool)!n_vertices; 9123 } 9124 if (!pcbddc->benign_change_explicit) { 9125 benign_n = pcbddc->benign_n; 9126 } else { 9127 benign_n = 0; 9128 } 9129 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9130 We need a global reduction to avoid possible deadlocks. 9131 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9132 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9133 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9134 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 9135 need_change = (PetscBool)(!need_change); 9136 } 9137 /* If the user defines additional constraints, we import them here. 9138 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 9139 if (need_change) { 9140 PC_IS *pcisf; 9141 PC_BDDC *pcbddcf; 9142 PC pcf; 9143 9144 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9145 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 9146 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 9147 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 9148 9149 /* hacks */ 9150 pcisf = (PC_IS*)pcf->data; 9151 pcisf->is_B_local = pcis->is_B_local; 9152 pcisf->vec1_N = pcis->vec1_N; 9153 pcisf->BtoNmap = pcis->BtoNmap; 9154 pcisf->n = pcis->n; 9155 pcisf->n_B = pcis->n_B; 9156 pcbddcf = (PC_BDDC*)pcf->data; 9157 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 9158 pcbddcf->mat_graph = pcbddc->mat_graph; 9159 pcbddcf->use_faces = PETSC_TRUE; 9160 pcbddcf->use_change_of_basis = PETSC_TRUE; 9161 pcbddcf->use_change_on_faces = PETSC_TRUE; 9162 pcbddcf->use_qr_single = PETSC_TRUE; 9163 pcbddcf->fake_change = PETSC_TRUE; 9164 9165 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 9166 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 9167 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 9168 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 9169 change = pcbddcf->ConstraintMatrix; 9170 pcbddcf->ConstraintMatrix = NULL; 9171 9172 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 9173 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 9174 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 9175 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 9176 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 9177 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 9178 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 9179 pcf->ops->destroy = NULL; 9180 pcf->ops->reset = NULL; 9181 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 9182 } 9183 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9184 9185 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 9186 if (iP) { 9187 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 9188 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 9189 ierr = PetscOptionsEnd();CHKERRQ(ierr); 9190 } 9191 if (discrete_harmonic) { 9192 Mat A; 9193 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 9194 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 9195 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 9196 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9197 ierr = MatDestroy(&A);CHKERRQ(ierr); 9198 } else { 9199 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9200 } 9201 ierr = MatDestroy(&change);CHKERRQ(ierr); 9202 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 9203 } 9204 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9205 9206 /* free adjacency */ 9207 if (free_used_adj) { 9208 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 9209 } 9210 ierr = PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9211 PetscFunctionReturn(0); 9212 } 9213 9214 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9215 { 9216 PC_IS *pcis=(PC_IS*)pc->data; 9217 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9218 PCBDDCGraph graph; 9219 PetscErrorCode ierr; 9220 9221 PetscFunctionBegin; 9222 /* attach interface graph for determining subsets */ 9223 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9224 IS verticesIS,verticescomm; 9225 PetscInt vsize,*idxs; 9226 9227 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9228 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 9229 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9230 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 9231 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9232 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9233 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 9234 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 9235 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 9236 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 9237 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 9238 } else { 9239 graph = pcbddc->mat_graph; 9240 } 9241 /* print some info */ 9242 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9243 IS vertices; 9244 PetscInt nv,nedges,nfaces; 9245 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 9246 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9247 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 9248 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9249 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 9250 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);CHKERRQ(ierr); 9251 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);CHKERRQ(ierr); 9252 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);CHKERRQ(ierr); 9253 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9254 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9255 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9256 } 9257 9258 /* sub_schurs init */ 9259 if (!pcbddc->sub_schurs) { 9260 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 9261 } 9262 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 9263 9264 /* free graph struct */ 9265 if (pcbddc->sub_schurs_rebuild) { 9266 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 9267 } 9268 PetscFunctionReturn(0); 9269 } 9270 9271 PetscErrorCode PCBDDCCheckOperator(PC pc) 9272 { 9273 PC_IS *pcis=(PC_IS*)pc->data; 9274 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9275 PetscErrorCode ierr; 9276 9277 PetscFunctionBegin; 9278 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9279 IS zerodiag = NULL; 9280 Mat S_j,B0_B=NULL; 9281 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9282 PetscScalar *p0_check,*array,*array2; 9283 PetscReal norm; 9284 PetscInt i; 9285 9286 /* B0 and B0_B */ 9287 if (zerodiag) { 9288 IS dummy; 9289 9290 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 9291 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 9292 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 9293 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 9294 } 9295 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9296 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 9297 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 9298 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9299 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9300 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9301 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9302 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 9303 /* S_j */ 9304 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9305 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9306 9307 /* mimic vector in \widetilde{W}_\Gamma */ 9308 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 9309 /* continuous in primal space */ 9310 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 9311 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9312 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9313 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9314 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 9315 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9316 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9317 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9318 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9319 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9320 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9321 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9322 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 9323 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 9324 9325 /* assemble rhs for coarse problem */ 9326 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9327 /* local with Schur */ 9328 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 9329 if (zerodiag) { 9330 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9331 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9332 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9333 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 9334 } 9335 /* sum on primal nodes the local contributions */ 9336 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9337 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9338 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9339 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9340 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9341 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9342 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9343 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 9344 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9345 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9346 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9347 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9348 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9349 /* scale primal nodes (BDDC sums contibutions) */ 9350 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 9351 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9352 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9353 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9354 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9355 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9356 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9357 /* global: \widetilde{B0}_B w_\Gamma */ 9358 if (zerodiag) { 9359 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 9360 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9361 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9362 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9363 } 9364 /* BDDC */ 9365 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 9366 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 9367 9368 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 9369 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 9370 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 9371 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);CHKERRQ(ierr); 9372 for (i=0;i<pcbddc->benign_n;i++) { 9373 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));CHKERRQ(ierr); 9374 } 9375 ierr = PetscFree(p0_check);CHKERRQ(ierr); 9376 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 9377 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 9378 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 9379 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9380 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 9381 } 9382 PetscFunctionReturn(0); 9383 } 9384 9385 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9386 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9387 { 9388 Mat At; 9389 IS rows; 9390 PetscInt rst,ren; 9391 PetscErrorCode ierr; 9392 PetscLayout rmap; 9393 9394 PetscFunctionBegin; 9395 rst = ren = 0; 9396 if (ccomm != MPI_COMM_NULL) { 9397 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 9398 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 9399 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 9400 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 9401 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 9402 } 9403 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 9404 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 9405 ierr = ISDestroy(&rows);CHKERRQ(ierr); 9406 9407 if (ccomm != MPI_COMM_NULL) { 9408 Mat_MPIAIJ *a,*b; 9409 IS from,to; 9410 Vec gvec; 9411 PetscInt lsize; 9412 9413 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9414 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9415 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9416 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9417 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9418 a = (Mat_MPIAIJ*)At->data; 9419 b = (Mat_MPIAIJ*)(*B)->data; 9420 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRMPI(ierr); 9421 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRMPI(ierr); 9422 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9423 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9424 b->A = a->A; 9425 b->B = a->B; 9426 9427 b->donotstash = a->donotstash; 9428 b->roworiented = a->roworiented; 9429 b->rowindices = NULL; 9430 b->rowvalues = NULL; 9431 b->getrowactive = PETSC_FALSE; 9432 9433 (*B)->rmap = rmap; 9434 (*B)->factortype = A->factortype; 9435 (*B)->assembled = PETSC_TRUE; 9436 (*B)->insertmode = NOT_SET_VALUES; 9437 (*B)->preallocated = PETSC_TRUE; 9438 9439 if (a->colmap) { 9440 #if defined(PETSC_USE_CTABLE) 9441 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9442 #else 9443 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9444 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9445 ierr = PetscArraycpy(b->colmap,a->colmap,At->cmap->N);CHKERRQ(ierr); 9446 #endif 9447 } else b->colmap = NULL; 9448 if (a->garray) { 9449 PetscInt len; 9450 len = a->B->cmap->n; 9451 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9452 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9453 if (len) { ierr = PetscArraycpy(b->garray,a->garray,len);CHKERRQ(ierr); } 9454 } else b->garray = NULL; 9455 9456 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9457 b->lvec = a->lvec; 9458 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9459 9460 /* cannot use VecScatterCopy */ 9461 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9462 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9463 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9464 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9465 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9466 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9467 ierr = ISDestroy(&from);CHKERRQ(ierr); 9468 ierr = ISDestroy(&to);CHKERRQ(ierr); 9469 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9470 } 9471 ierr = MatDestroy(&At);CHKERRQ(ierr); 9472 PetscFunctionReturn(0); 9473 } 9474