1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 29 30 if (pcbddc->dbg_flag) { 31 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 32 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 33 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 35 } 36 37 if (pcbddc->dbg_flag) { 38 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 39 } 40 41 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 42 43 /* max size of subsets */ 44 mss = 0; 45 for (i=0;i<sub_schurs->n_subs;i++) { 46 PetscInt subset_size; 47 48 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 49 mss = PetscMax(mss,subset_size); 50 } 51 52 /* min/max and threshold */ 53 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 54 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 55 nmax = PetscMax(nmin,nmax); 56 allocated_S_St = PETSC_FALSE; 57 if (nmin) { 58 allocated_S_St = PETSC_TRUE; 59 } 60 61 /* allocate lapack workspace */ 62 cum = cum2 = 0; 63 maxneigs = 0; 64 for (i=0;i<sub_schurs->n_subs;i++) { 65 PetscInt n,subset_size; 66 67 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 68 n = PetscMin(subset_size,nmax); 69 cum += subset_size; 70 cum2 += subset_size*n; 71 maxneigs = PetscMax(maxneigs,n); 72 } 73 if (mss) { 74 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 75 PetscBLASInt B_itype = 1; 76 PetscBLASInt B_N = mss; 77 PetscReal zero = 0.0; 78 PetscReal eps = 0.0; /* dlamch? */ 79 80 B_lwork = -1; 81 S = NULL; 82 St = NULL; 83 eigs = NULL; 84 eigv = NULL; 85 B_iwork = NULL; 86 B_ifail = NULL; 87 #if defined(PETSC_USE_COMPLEX) 88 rwork = NULL; 89 #endif 90 thresh = 1.0; 91 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 92 #if defined(PETSC_USE_COMPLEX) 93 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 94 #else 95 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 96 #endif 97 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 98 ierr = PetscFPTrapPop();CHKERRQ(ierr); 99 } else { 100 /* TODO */ 101 } 102 } else { 103 lwork = 0; 104 } 105 106 nv = 0; 107 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 108 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 109 } 110 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 111 if (allocated_S_St) { 112 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 113 } 114 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 115 #if defined(PETSC_USE_COMPLEX) 116 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 117 #endif 118 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 119 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 121 nv+cum,&pcbddc->adaptive_constraints_idxs, 122 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 123 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 124 125 maxneigs = 0; 126 cum = cum2 = cumarray = 0; 127 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 128 pcbddc->adaptive_constraints_data_ptr[0] = 0; 129 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 130 const PetscInt *idxs; 131 132 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 133 for (cum=0;cum<nv;cum++) { 134 pcbddc->adaptive_constraints_n[cum] = 1; 135 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 136 pcbddc->adaptive_constraints_data[cum] = 1.0; 137 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 138 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 139 } 140 cum2 = cum; 141 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 142 } 143 144 if (mss) { /* multilevel */ 145 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 147 } 148 149 for (i=0;i<sub_schurs->n_subs;i++) { 150 151 const PetscInt *idxs; 152 PetscReal infty = PETSC_MAX_REAL; 153 PetscInt j,subset_size,eigs_start = 0; 154 PetscBLASInt B_N; 155 PetscBool same_data = PETSC_FALSE; 156 157 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 158 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 159 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 160 if (sub_schurs->is_hermitian) { 161 PetscInt j,k; 162 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 163 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 } 166 for (j=0;j<subset_size;j++) { 167 for (k=j;k<subset_size;k++) { 168 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 169 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 170 } 171 } 172 } else { 173 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 174 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 } 176 } else { 177 S = Sarray + cumarray; 178 St = Starray + cumarray; 179 } 180 181 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 182 /* see if we can save some work */ 183 if (sub_schurs->n_subs == 1) { 184 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 185 } 186 187 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 188 B_neigs = 0; 189 } else { 190 /* Threshold: this is an heuristic for edges */ 191 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 192 193 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 194 PetscBLASInt B_itype = 1; 195 PetscBLASInt B_IL, B_IU; 196 PetscReal eps = -1.0; /* dlamch? */ 197 PetscInt nmin_s; 198 199 /* ask for eigenvalues larger than thresh */ 200 if (pcbddc->dbg_flag) { 201 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 202 } 203 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 204 #if defined(PETSC_USE_COMPLEX) 205 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 206 #else 207 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 208 #endif 209 ierr = PetscFPTrapPop();CHKERRQ(ierr); 210 if (B_ierr) { 211 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 212 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 213 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 214 } 215 216 if (B_neigs > nmax) { 217 if (pcbddc->dbg_flag) { 218 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 219 } 220 eigs_start = B_neigs -nmax; 221 B_neigs = nmax; 222 } 223 224 nmin_s = PetscMin(nmin,B_N); 225 if (B_neigs < nmin_s) { 226 PetscBLASInt B_neigs2; 227 228 B_IU = B_N - B_neigs; 229 B_IL = B_N - nmin_s + 1; 230 if (pcbddc->dbg_flag) { 231 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 232 } 233 if (sub_schurs->is_hermitian) { 234 PetscInt j; 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 for (j=0;j<subset_size;j++) { 239 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 240 } 241 } else { 242 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 244 } 245 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 246 #if defined(PETSC_USE_COMPLEX) 247 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 248 #else 249 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 250 #endif 251 ierr = PetscFPTrapPop();CHKERRQ(ierr); 252 B_neigs += B_neigs2; 253 } 254 if (B_ierr) { 255 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 256 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 257 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 if (pcbddc->dbg_flag) { 260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 261 for (j=0;j<B_neigs;j++) { 262 if (eigs[j] == 0.0) { 263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 264 } else { 265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 266 } 267 } 268 } 269 } else { 270 /* TODO */ 271 } 272 } 273 maxneigs = PetscMax(B_neigs,maxneigs); 274 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 275 if (B_neigs) { 276 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 277 278 if (pcbddc->dbg_flag > 1) { 279 PetscInt ii; 280 for (ii=0;ii<B_neigs;ii++) { 281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 282 for (j=0;j<B_N;j++) { 283 #if defined(PETSC_USE_COMPLEX) 284 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 287 #else 288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 289 #endif 290 } 291 } 292 } 293 #if 0 294 for (j=0;j<B_neigs;j++) { 295 PetscBLASInt Blas_N,Blas_one = 1.0; 296 PetscScalar norm; 297 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 298 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 299 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 300 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 301 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 302 } else { 303 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 304 } 305 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 306 } 307 #endif 308 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 309 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 310 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 311 cum++; 312 } 313 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 314 /* shift for next computation */ 315 cumarray += subset_size*subset_size; 316 } 317 if (pcbddc->dbg_flag) { 318 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 319 } 320 321 if (mss) { 322 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 323 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 324 /* destroy matrices (junk) */ 325 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 326 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 327 } 328 if (allocated_S_St) { 329 ierr = PetscFree2(S,St);CHKERRQ(ierr); 330 } 331 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 332 #if defined(PETSC_USE_COMPLEX) 333 ierr = PetscFree(rwork);CHKERRQ(ierr); 334 #endif 335 if (pcbddc->dbg_flag) { 336 PetscInt maxneigs_r; 337 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 338 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 339 } 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "PCBDDCSetUpSolvers" 345 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 346 { 347 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 348 PetscScalar *coarse_submat_vals; 349 PetscErrorCode ierr; 350 351 PetscFunctionBegin; 352 /* Setup local scatters R_to_B and (optionally) R_to_D */ 353 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 354 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 355 356 /* Setup local neumann solver ksp_R */ 357 /* PCBDDCSetUpLocalScatters should be called first! */ 358 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 359 360 /* Change global null space passed in by the user if change of basis has been requested */ 361 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 362 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 363 } 364 365 /* 366 Setup local correction and local part of coarse basis. 367 Gives back the dense local part of the coarse matrix in column major ordering 368 */ 369 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 370 371 /* Compute total number of coarse nodes and setup coarse solver */ 372 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 373 374 /* free */ 375 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378 379 #undef __FUNCT__ 380 #define __FUNCT__ "PCBDDCResetCustomization" 381 PetscErrorCode PCBDDCResetCustomization(PC pc) 382 { 383 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 384 PetscErrorCode ierr; 385 386 PetscFunctionBegin; 387 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 390 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 392 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 393 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 394 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 395 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 396 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 397 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 #undef __FUNCT__ 402 #define __FUNCT__ "PCBDDCResetTopography" 403 PetscErrorCode PCBDDCResetTopography(PC pc) 404 { 405 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 406 PetscErrorCode ierr; 407 408 PetscFunctionBegin; 409 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 411 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 412 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 413 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "PCBDDCResetSolvers" 419 PetscErrorCode PCBDDCResetSolvers(PC pc) 420 { 421 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 422 PetscScalar *array; 423 PetscErrorCode ierr; 424 425 PetscFunctionBegin; 426 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 427 if (pcbddc->coarse_phi_B) { 428 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 429 ierr = PetscFree(array);CHKERRQ(ierr); 430 } 431 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 432 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 437 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 438 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 440 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 441 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 442 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 443 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 444 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 445 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 446 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 447 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 448 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 449 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 450 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 451 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 452 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 459 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 460 { 461 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 462 PC_IS *pcis = (PC_IS*)pc->data; 463 VecType impVecType; 464 PetscInt n_constraints,n_R,old_size; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 469 /* get sizes */ 470 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 471 n_R = pcis->n-pcbddc->n_vertices; 472 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 473 /* local work vectors (try to avoid unneeded work)*/ 474 /* R nodes */ 475 old_size = -1; 476 if (pcbddc->vec1_R) { 477 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 478 } 479 if (n_R != old_size) { 480 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 482 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 483 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 484 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 485 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 486 } 487 /* local primal dofs */ 488 old_size = -1; 489 if (pcbddc->vec1_P) { 490 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 491 } 492 if (pcbddc->local_primal_size != old_size) { 493 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 494 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 495 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 496 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 497 } 498 /* local explicit constraints */ 499 old_size = -1; 500 if (pcbddc->vec1_C) { 501 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 502 } 503 if (n_constraints && n_constraints != old_size) { 504 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 505 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 506 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 507 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 508 } 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "PCBDDCSetUpCorrection" 514 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 515 { 516 PetscErrorCode ierr; 517 /* pointers to pcis and pcbddc */ 518 PC_IS* pcis = (PC_IS*)pc->data; 519 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 520 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 521 /* submatrices of local problem */ 522 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 523 /* submatrices of local coarse problem */ 524 Mat S_VV,S_CV,S_VC,S_CC; 525 /* working matrices */ 526 Mat C_CR; 527 /* additional working stuff */ 528 PC pc_R; 529 Mat F; 530 PetscBool isLU,isCHOL,isILU; 531 532 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 533 PetscScalar *work; 534 PetscInt *idx_V_B; 535 PetscInt n,n_vertices,n_constraints; 536 PetscInt i,n_R,n_D,n_B; 537 PetscBool unsymmetric_check; 538 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 539 MatType impMatType; 540 /* some shortcuts to scalars */ 541 PetscScalar one=1.0,m_one=-1.0; 542 543 PetscFunctionBegin; 544 n_vertices = pcbddc->n_vertices; 545 n_constraints = pcbddc->local_primal_size-n_vertices; 546 /* Set Non-overlapping dimensions */ 547 n_B = pcis->n_B; 548 n_D = pcis->n - n_B; 549 n_R = pcis->n - n_vertices; 550 551 /* Set types for local objects needed by BDDC precondtioner */ 552 impMatType = MATSEQDENSE; 553 554 /* vertices in boundary numbering */ 555 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 556 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 557 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 558 559 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 560 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 568 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 569 570 unsymmetric_check = PETSC_FALSE; 571 /* allocate workspace */ 572 n = 0; 573 if (n_constraints) { 574 n += n_R*n_constraints; 575 } 576 if (n_vertices) { 577 n = PetscMax(2*n_R*n_vertices,n); 578 n = PetscMax((n_R+n_B)*n_vertices,n); 579 } 580 if (!pcbddc->symmetric_primal) { 581 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 582 unsymmetric_check = PETSC_TRUE; 583 } 584 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 585 586 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 587 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 590 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 591 if (isLU || isILU || isCHOL) { 592 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 593 } else if (sub_schurs->reuse_mumps) { 594 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 595 MatFactorType type; 596 597 F = reuse_mumps->F; 598 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 599 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 600 } else { 601 F = NULL; 602 } 603 604 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 605 if (n_constraints) { 606 Mat M1,M2,M3; 607 Mat auxmat; 608 IS is_aux; 609 PetscScalar *array,*array2; 610 611 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 614 /* Extract constraints on R nodes: C_{CR} */ 615 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 616 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 617 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 618 619 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 620 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 621 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 622 for (i=0;i<n_constraints;i++) { 623 const PetscScalar *row_cmat_values; 624 const PetscInt *row_cmat_indices; 625 PetscInt size_of_constraint,j; 626 627 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 628 for (j=0;j<size_of_constraint;j++) { 629 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 630 } 631 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 632 } 633 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 634 if (F) { 635 Mat B; 636 637 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 638 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 639 ierr = MatDestroy(&B);CHKERRQ(ierr); 640 } else { 641 PetscScalar *marr; 642 643 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 for (i=0;i<n_constraints;i++) { 645 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 646 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 650 } 651 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 652 } 653 if (!pcbddc->switch_static) { 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 655 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 656 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 660 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 661 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 662 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 663 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 664 } 665 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 666 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 667 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 668 } else { 669 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 670 pcbddc->local_auxmat2 = local_auxmat2_R; 671 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 672 } 673 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 674 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 675 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 677 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 678 if (isCHOL) { 679 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 680 } else { 681 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 682 } 683 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 684 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 685 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 686 ierr = MatDestroy(&M2);CHKERRQ(ierr); 687 ierr = MatDestroy(&M3);CHKERRQ(ierr); 688 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 689 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 690 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 691 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 692 ierr = MatDestroy(&M1);CHKERRQ(ierr); 693 } 694 /* Get submatrices from subdomain matrix */ 695 if (n_vertices) { 696 IS is_aux; 697 698 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 699 IS tis; 700 701 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 702 ierr = ISSort(tis);CHKERRQ(ierr); 703 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 704 ierr = ISDestroy(&tis);CHKERRQ(ierr); 705 } else { 706 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 707 } 708 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 709 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 710 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 711 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 712 } 713 714 /* Matrix of coarse basis functions (local) */ 715 if (pcbddc->coarse_phi_B) { 716 PetscInt on_B,on_primal,on_D=n_D; 717 if (pcbddc->coarse_phi_D) { 718 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 719 } 720 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 721 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 722 PetscScalar *marray; 723 724 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 725 ierr = PetscFree(marray);CHKERRQ(ierr); 726 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 727 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 728 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 729 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 730 } 731 } 732 733 if (!pcbddc->coarse_phi_B) { 734 PetscScalar *marray; 735 736 n = n_B*pcbddc->local_primal_size; 737 if (pcbddc->switch_static || pcbddc->dbg_flag) { 738 n += n_D*pcbddc->local_primal_size; 739 } 740 if (!pcbddc->symmetric_primal) { 741 n *= 2; 742 } 743 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 745 n = n_B*pcbddc->local_primal_size; 746 if (pcbddc->switch_static || pcbddc->dbg_flag) { 747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 748 n += n_D*pcbddc->local_primal_size; 749 } 750 if (!pcbddc->symmetric_primal) { 751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 752 if (pcbddc->switch_static || pcbddc->dbg_flag) { 753 n = n_B*pcbddc->local_primal_size; 754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 755 } 756 } else { 757 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 758 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 760 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 761 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 762 } 763 } 764 } 765 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 766 /* vertices */ 767 if (n_vertices) { 768 769 ierr = MatConvert(A_VV,impMatType,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 770 771 if (n_R) { 772 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 773 PetscBLASInt B_N,B_one = 1; 774 PetscScalar *x,*y; 775 PetscBool isseqaij; 776 777 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 778 ierr = MatConvert(A_RV,impMatType,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 780 if (F) { /* TODO could be optimized for symmetric problems */ 781 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 782 } else { 783 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 784 for (i=0;i<n_vertices;i++) { 785 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 786 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 787 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 788 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 789 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 790 } 791 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 792 } 793 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 794 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 795 if (n_constraints) { 796 Mat B; 797 798 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 799 for (i=0;i<n_vertices;i++) { 800 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 801 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 802 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 803 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 804 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 805 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 806 } 807 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 808 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 809 ierr = MatDestroy(&B);CHKERRQ(ierr); 810 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 811 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 812 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 813 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 814 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 815 ierr = MatDestroy(&B);CHKERRQ(ierr); 816 } 817 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 818 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 819 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 820 } 821 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 822 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 823 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 824 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 825 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 826 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 827 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 828 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 829 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 830 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 831 } else { 832 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 833 } 834 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 835 /* coarse basis functions */ 836 for (i=0;i<n_vertices;i++) { 837 PetscScalar *y; 838 839 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 840 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 841 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 842 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 y[n_B*i+idx_V_B[i]] = 1.0; 845 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 846 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 847 848 if (pcbddc->switch_static || pcbddc->dbg_flag) { 849 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 850 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 851 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 853 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 854 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 855 } 856 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 857 } 858 /* if n_R == 0 the object is not destroyed */ 859 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 860 } 861 862 if (n_constraints) { 863 Mat B; 864 865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 866 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 867 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 868 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 869 if (n_vertices) { 870 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 871 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 872 } else { 873 Mat S_VCt; 874 875 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 876 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 877 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 878 } 879 } 880 ierr = MatDestroy(&B);CHKERRQ(ierr); 881 /* coarse basis functions */ 882 for (i=0;i<n_constraints;i++) { 883 PetscScalar *y; 884 885 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 886 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 887 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 888 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 891 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 892 if (pcbddc->switch_static || pcbddc->dbg_flag) { 893 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 894 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 895 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 898 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 899 } 900 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 901 } 902 } 903 if (n_constraints) { 904 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 905 } 906 907 /* compute other basis functions for non-symmetric problems */ 908 if (!pcbddc->symmetric_primal) { 909 910 if (n_constraints) { 911 Mat S_CCT,B_C; 912 913 /* this is a lazy thing */ 914 ierr = MatConvert(C_CR,impMatType,MAT_INPLACE_MATRIX,&C_CR);CHKERRQ(ierr); 915 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 916 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 917 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 918 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 919 if (n_vertices) { 920 Mat B_V,S_VCT; 921 922 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 923 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 924 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 925 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 926 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 927 } 928 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 929 } else { /* if there are no constraints, reset work */ 930 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 931 } 932 if (n_vertices && n_R) { 933 Mat A_VRT; 934 PetscScalar *marray; 935 PetscBLASInt B_N,B_one = 1; 936 937 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 938 ierr = MatConvert(A_VRT,impMatType,MAT_INPLACE_MATRIX,&A_VRT);CHKERRQ(ierr); 939 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 940 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 941 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 942 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 943 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 944 } 945 946 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 947 for (i=0;i<pcbddc->local_primal_size;i++) { 948 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 949 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 950 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 951 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 952 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 953 } 954 } else { 955 for (i=0;i<pcbddc->local_primal_size;i++) { 956 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 957 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 958 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 959 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 960 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 961 } 962 } 963 /* coarse basis functions */ 964 for (i=0;i<pcbddc->local_primal_size;i++) { 965 PetscScalar *y; 966 967 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 968 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 969 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 970 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 971 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 if (i<n_vertices) { 973 y[n_B*i+idx_V_B[i]] = 1.0; 974 } 975 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 976 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 977 978 if (pcbddc->switch_static || pcbddc->dbg_flag) { 979 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 980 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 981 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 982 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 984 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 985 } 986 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 987 } 988 } 989 /* free memory */ 990 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 991 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 992 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 993 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 994 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 995 ierr = PetscFree(work);CHKERRQ(ierr); 996 if (n_vertices) { 997 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 998 } 999 if (n_constraints) { 1000 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1001 } 1002 /* Checking coarse_sub_mat and coarse basis functios */ 1003 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1004 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1005 if (pcbddc->dbg_flag) { 1006 Mat coarse_sub_mat; 1007 Mat AUXMAT,TM1,TM2,TM3,TM4; 1008 Mat coarse_phi_D,coarse_phi_B; 1009 Mat coarse_psi_D,coarse_psi_B; 1010 Mat A_II,A_BB,A_IB,A_BI; 1011 Mat C_B,CPHI; 1012 IS is_dummy; 1013 Vec mones; 1014 MatType checkmattype=MATSEQAIJ; 1015 PetscReal real_value; 1016 1017 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1018 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1019 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1020 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1021 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1022 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1023 if (unsymmetric_check) { 1024 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1025 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1026 } 1027 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1028 1029 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1030 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1032 if (unsymmetric_check) { 1033 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1034 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1035 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1036 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1037 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1038 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1039 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1040 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1041 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1042 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1043 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1044 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1045 } else { 1046 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1047 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1048 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1049 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1050 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1051 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1053 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1054 } 1055 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1056 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1057 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1058 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 1059 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1060 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1061 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1062 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1063 1064 /* check constraints */ 1065 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1066 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1067 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1068 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1069 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1070 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1071 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1073 if (unsymmetric_check) { 1074 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1075 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1076 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1077 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1079 } 1080 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1081 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1082 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1083 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1084 1085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1086 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1087 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1088 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1089 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1090 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1091 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1092 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1093 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1094 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1095 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1096 if (unsymmetric_check) { 1097 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1098 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1099 } 1100 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1101 } 1102 /* get back data */ 1103 *coarse_submat_vals_n = coarse_submat_vals; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1109 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1110 { 1111 Mat *work_mat; 1112 IS isrow_s,iscol_s; 1113 PetscBool rsorted,csorted; 1114 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1119 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1120 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1121 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1122 1123 if (!rsorted) { 1124 const PetscInt *idxs; 1125 PetscInt *idxs_sorted,i; 1126 1127 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1128 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1129 for (i=0;i<rsize;i++) { 1130 idxs_perm_r[i] = i; 1131 } 1132 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1133 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1134 for (i=0;i<rsize;i++) { 1135 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1136 } 1137 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1138 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1139 } else { 1140 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1141 isrow_s = isrow; 1142 } 1143 1144 if (!csorted) { 1145 if (isrow == iscol) { 1146 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1147 iscol_s = isrow_s; 1148 } else { 1149 const PetscInt *idxs; 1150 PetscInt *idxs_sorted,i; 1151 1152 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1153 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1154 for (i=0;i<csize;i++) { 1155 idxs_perm_c[i] = i; 1156 } 1157 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1158 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1159 for (i=0;i<csize;i++) { 1160 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1161 } 1162 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1163 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1164 } 1165 } else { 1166 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1167 iscol_s = iscol; 1168 } 1169 1170 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1171 1172 if (!rsorted || !csorted) { 1173 Mat new_mat; 1174 IS is_perm_r,is_perm_c; 1175 1176 if (!rsorted) { 1177 PetscInt *idxs_r,i; 1178 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1179 for (i=0;i<rsize;i++) { 1180 idxs_r[idxs_perm_r[i]] = i; 1181 } 1182 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1183 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1184 } else { 1185 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1186 } 1187 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1188 1189 if (!csorted) { 1190 if (isrow_s == iscol_s) { 1191 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1192 is_perm_c = is_perm_r; 1193 } else { 1194 PetscInt *idxs_c,i; 1195 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1196 for (i=0;i<csize;i++) { 1197 idxs_c[idxs_perm_c[i]] = i; 1198 } 1199 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1200 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1201 } 1202 } else { 1203 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1204 } 1205 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1206 1207 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1208 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1209 work_mat[0] = new_mat; 1210 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1211 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1212 } 1213 1214 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1215 *B = work_mat[0]; 1216 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1217 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1218 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1224 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1225 { 1226 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1227 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1228 Mat new_mat; 1229 IS is_local,is_global; 1230 PetscInt local_size; 1231 PetscBool isseqaij; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1236 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1237 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1238 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1239 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1240 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1241 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1242 1243 /* check */ 1244 if (pcbddc->dbg_flag) { 1245 Vec x,x_change; 1246 PetscReal error; 1247 1248 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1249 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1250 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1251 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1254 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1255 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1257 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1258 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1259 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1260 ierr = VecDestroy(&x);CHKERRQ(ierr); 1261 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1262 } 1263 1264 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1265 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1266 if (isseqaij) { 1267 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1268 } else { 1269 Mat work_mat; 1270 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1271 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1272 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1273 } 1274 if (matis->A->symmetric_set) { 1275 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1276 #if !defined(PETSC_USE_COMPLEX) 1277 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1278 #endif 1279 } 1280 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1286 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1287 { 1288 PC_IS* pcis = (PC_IS*)(pc->data); 1289 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1290 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1291 PetscInt *idx_R_local=NULL; 1292 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1293 PetscInt vbs,bs; 1294 PetscBT bitmask=NULL; 1295 PetscErrorCode ierr; 1296 1297 PetscFunctionBegin; 1298 /* 1299 No need to setup local scatters if 1300 - primal space is unchanged 1301 AND 1302 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1303 AND 1304 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1305 */ 1306 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1307 PetscFunctionReturn(0); 1308 } 1309 /* destroy old objects */ 1310 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1311 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1312 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1313 /* Set Non-overlapping dimensions */ 1314 n_B = pcis->n_B; 1315 n_D = pcis->n - n_B; 1316 n_vertices = pcbddc->n_vertices; 1317 1318 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1319 1320 /* create auxiliary bitmask and allocate workspace */ 1321 if (!sub_schurs->reuse_mumps) { 1322 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1323 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1324 for (i=0;i<n_vertices;i++) { 1325 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1326 } 1327 1328 for (i=0, n_R=0; i<pcis->n; i++) { 1329 if (!PetscBTLookup(bitmask,i)) { 1330 idx_R_local[n_R++] = i; 1331 } 1332 } 1333 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1334 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1335 1336 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1337 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1338 } 1339 1340 /* Block code */ 1341 vbs = 1; 1342 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1343 if (bs>1 && !(n_vertices%bs)) { 1344 PetscBool is_blocked = PETSC_TRUE; 1345 PetscInt *vary; 1346 if (!sub_schurs->reuse_mumps) { 1347 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1348 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1349 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1350 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1351 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1352 for (i=0; i<pcis->n/bs; i++) { 1353 if (vary[i]!=0 && vary[i]!=bs) { 1354 is_blocked = PETSC_FALSE; 1355 break; 1356 } 1357 } 1358 ierr = PetscFree(vary);CHKERRQ(ierr); 1359 } else { 1360 /* Verify directly the R set */ 1361 for (i=0; i<n_R/bs; i++) { 1362 PetscInt j,node=idx_R_local[bs*i]; 1363 for (j=1; j<bs; j++) { 1364 if (node != idx_R_local[bs*i+j]-j) { 1365 is_blocked = PETSC_FALSE; 1366 break; 1367 } 1368 } 1369 } 1370 } 1371 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1372 vbs = bs; 1373 for (i=0;i<n_R/vbs;i++) { 1374 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1375 } 1376 } 1377 } 1378 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1379 if (sub_schurs->reuse_mumps) { 1380 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1381 1382 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1383 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1384 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1385 reuse_mumps->is_R = pcbddc->is_R_local; 1386 } else { 1387 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1388 } 1389 1390 /* print some info if requested */ 1391 if (pcbddc->dbg_flag) { 1392 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1393 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1394 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1395 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1396 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1397 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1399 } 1400 1401 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1402 if (!sub_schurs->reuse_mumps) { 1403 IS is_aux1,is_aux2; 1404 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1405 1406 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1407 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1408 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1409 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1410 for (i=0; i<n_D; i++) { 1411 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1412 } 1413 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1414 for (i=0, j=0; i<n_R; i++) { 1415 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1416 aux_array1[j++] = i; 1417 } 1418 } 1419 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1420 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1421 for (i=0, j=0; i<n_B; i++) { 1422 if (!PetscBTLookup(bitmask,is_indices[i])) { 1423 aux_array2[j++] = i; 1424 } 1425 } 1426 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1427 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1428 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1429 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1430 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1431 1432 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1433 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1434 for (i=0, j=0; i<n_R; i++) { 1435 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1436 aux_array1[j++] = i; 1437 } 1438 } 1439 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1440 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1441 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1442 } 1443 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1444 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1445 } else { 1446 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1447 IS tis; 1448 PetscInt schur_size; 1449 1450 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1451 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1452 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1453 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1454 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1455 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1456 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1457 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1458 } 1459 } 1460 PetscFunctionReturn(0); 1461 } 1462 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1466 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1467 { 1468 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1469 PC_IS *pcis = (PC_IS*)pc->data; 1470 PC pc_temp; 1471 Mat A_RR; 1472 MatReuse reuse; 1473 PetscScalar m_one = -1.0; 1474 PetscReal value; 1475 PetscInt n_D,n_R; 1476 PetscBool use_exact,use_exact_reduced,issbaij; 1477 PetscErrorCode ierr; 1478 /* prefixes stuff */ 1479 char dir_prefix[256],neu_prefix[256],str_level[16]; 1480 size_t len; 1481 1482 PetscFunctionBegin; 1483 1484 /* compute prefixes */ 1485 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1486 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1487 if (!pcbddc->current_level) { 1488 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1489 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1490 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1491 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1492 } else { 1493 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1494 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1495 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1496 len -= 15; /* remove "pc_bddc_coarse_" */ 1497 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1498 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1499 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1500 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1501 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1502 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1503 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1504 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1505 } 1506 1507 /* DIRICHLET PROBLEM */ 1508 if (dirichlet) { 1509 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1510 if (pcbddc->local_mat->symmetric_set) { 1511 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1512 } 1513 /* Matrix for Dirichlet problem is pcis->A_II */ 1514 n_D = pcis->n - pcis->n_B; 1515 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1516 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1517 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1518 /* default */ 1519 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1520 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1521 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1522 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1523 if (issbaij) { 1524 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1525 } else { 1526 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1527 } 1528 /* Allow user's customization */ 1529 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1530 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1531 } 1532 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1533 if (sub_schurs->reuse_mumps) { 1534 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1535 1536 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1537 } 1538 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1539 if (!n_D) { 1540 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1541 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1542 } 1543 /* Set Up KSP for Dirichlet problem of BDDC */ 1544 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1545 /* set ksp_D into pcis data */ 1546 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1547 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1548 pcis->ksp_D = pcbddc->ksp_D; 1549 } 1550 1551 /* NEUMANN PROBLEM */ 1552 A_RR = 0; 1553 if (neumann) { 1554 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1555 PetscInt ibs,mbs; 1556 PetscBool issbaij; 1557 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1558 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1559 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1560 if (pcbddc->ksp_R) { /* already created ksp */ 1561 PetscInt nn_R; 1562 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1563 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1564 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1565 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1566 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1567 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1568 reuse = MAT_INITIAL_MATRIX; 1569 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1570 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1571 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1572 reuse = MAT_INITIAL_MATRIX; 1573 } else { /* safe to reuse the matrix */ 1574 reuse = MAT_REUSE_MATRIX; 1575 } 1576 } 1577 /* last check */ 1578 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1579 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1580 reuse = MAT_INITIAL_MATRIX; 1581 } 1582 } else { /* first time, so we need to create the matrix */ 1583 reuse = MAT_INITIAL_MATRIX; 1584 } 1585 /* extract A_RR */ 1586 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1587 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1588 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1589 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1590 if (matis->A == pcbddc->local_mat) { 1591 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1592 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1593 } else { 1594 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1595 } 1596 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1597 if (matis->A == pcbddc->local_mat) { 1598 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1599 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1600 } else { 1601 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1602 } 1603 } 1604 if (!sub_schurs->reuse_mumps) { 1605 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1606 if (pcbddc->local_mat->symmetric_set) { 1607 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1608 } 1609 } else { 1610 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1611 1612 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1613 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1614 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1615 } 1616 if (!pcbddc->ksp_R) { /* create object if not present */ 1617 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1618 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1619 /* default */ 1620 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1621 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1622 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1623 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1624 if (issbaij) { 1625 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1626 } else { 1627 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1628 } 1629 /* Allow user's customization */ 1630 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1631 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1632 } 1633 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1634 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1635 if (!n_R) { 1636 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1637 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1638 } 1639 /* Reuse MUMPS solver if it is present */ 1640 if (sub_schurs->reuse_mumps) { 1641 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1642 1643 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1644 } 1645 /* Set Up KSP for Neumann problem of BDDC */ 1646 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1647 } 1648 /* free Neumann problem's matrix */ 1649 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1650 1651 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1652 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1653 if (pcbddc->dbg_flag) { 1654 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1655 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1656 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1657 } 1658 if (dirichlet) { /* Dirichlet */ 1659 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1660 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1661 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1662 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1663 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1664 /* need to be adapted? */ 1665 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1666 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1667 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1668 /* print info */ 1669 if (pcbddc->dbg_flag) { 1670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1671 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1672 } 1673 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1674 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1675 } 1676 } 1677 if (neumann) { /* Neumann */ 1678 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1679 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1680 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1681 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1682 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1683 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1684 /* need to be adapted? */ 1685 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1686 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1687 /* print info */ 1688 if (pcbddc->dbg_flag) { 1689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1690 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1691 } 1692 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1693 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1694 } 1695 } 1696 } 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1702 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1703 { 1704 PetscErrorCode ierr; 1705 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1706 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1707 1708 PetscFunctionBegin; 1709 if (!sub_schurs->reuse_mumps) { 1710 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1711 } 1712 if (!pcbddc->switch_static) { 1713 if (applytranspose && pcbddc->local_auxmat1) { 1714 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1715 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1716 } 1717 if (!sub_schurs->reuse_mumps) { 1718 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1719 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1720 } else { 1721 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1722 1723 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 } 1726 } else { 1727 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1728 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1729 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1730 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1731 if (applytranspose && pcbddc->local_auxmat1) { 1732 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1733 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1734 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1735 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1736 } 1737 } 1738 if (!sub_schurs->reuse_mumps) { 1739 if (applytranspose) { 1740 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1741 } else { 1742 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1743 } 1744 } else { 1745 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1746 1747 if (applytranspose) { 1748 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1749 } else { 1750 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1751 } 1752 } 1753 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1754 if (!pcbddc->switch_static) { 1755 if (!sub_schurs->reuse_mumps) { 1756 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1757 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1758 } else { 1759 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1760 1761 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1762 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1763 } 1764 if (!applytranspose && pcbddc->local_auxmat1) { 1765 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1766 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1767 } 1768 } else { 1769 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1770 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1771 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1772 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1773 if (!applytranspose && pcbddc->local_auxmat1) { 1774 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1775 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1776 } 1777 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1778 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1781 } 1782 PetscFunctionReturn(0); 1783 } 1784 1785 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1786 #undef __FUNCT__ 1787 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1788 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1789 { 1790 PetscErrorCode ierr; 1791 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1792 PC_IS* pcis = (PC_IS*) (pc->data); 1793 const PetscScalar zero = 0.0; 1794 1795 PetscFunctionBegin; 1796 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1797 if (applytranspose) { 1798 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1799 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1800 } else { 1801 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1802 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1803 } 1804 /* start communications from local primal nodes to rhs of coarse solver */ 1805 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1806 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1807 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1808 1809 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1810 /* TODO remove null space when doing multilevel */ 1811 if (pcbddc->coarse_ksp) { 1812 Vec rhs,sol; 1813 1814 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1815 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1816 if (applytranspose) { 1817 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1818 } else { 1819 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1820 } 1821 } 1822 1823 /* Local solution on R nodes */ 1824 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1825 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1826 } 1827 1828 /* communications from coarse sol to local primal nodes */ 1829 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1830 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1831 1832 /* Sum contributions from two levels */ 1833 if (applytranspose) { 1834 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1835 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1836 } else { 1837 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1838 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 #undef __FUNCT__ 1844 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1845 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1846 { 1847 PetscErrorCode ierr; 1848 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1849 PetscScalar *array; 1850 Vec from,to; 1851 1852 PetscFunctionBegin; 1853 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1854 from = pcbddc->coarse_vec; 1855 to = pcbddc->vec1_P; 1856 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1857 Vec tvec; 1858 1859 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1860 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1861 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1862 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1863 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1864 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1865 } 1866 } else { /* from local to global -> put data in coarse right hand side */ 1867 from = pcbddc->vec1_P; 1868 to = pcbddc->coarse_vec; 1869 } 1870 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1876 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1877 { 1878 PetscErrorCode ierr; 1879 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1880 PetscScalar *array; 1881 Vec from,to; 1882 1883 PetscFunctionBegin; 1884 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1885 from = pcbddc->coarse_vec; 1886 to = pcbddc->vec1_P; 1887 } else { /* from local to global -> put data in coarse right hand side */ 1888 from = pcbddc->vec1_P; 1889 to = pcbddc->coarse_vec; 1890 } 1891 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1892 if (smode == SCATTER_FORWARD) { 1893 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1894 Vec tvec; 1895 1896 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1897 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1898 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1899 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1900 } 1901 } else { 1902 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1903 ierr = VecResetArray(from);CHKERRQ(ierr); 1904 } 1905 } 1906 PetscFunctionReturn(0); 1907 } 1908 1909 /* uncomment for testing purposes */ 1910 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1913 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1914 { 1915 PetscErrorCode ierr; 1916 PC_IS* pcis = (PC_IS*)(pc->data); 1917 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1918 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1919 /* one and zero */ 1920 PetscScalar one=1.0,zero=0.0; 1921 /* space to store constraints and their local indices */ 1922 PetscScalar *constraints_data; 1923 PetscInt *constraints_idxs,*constraints_idxs_B; 1924 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1925 PetscInt *constraints_n; 1926 /* iterators */ 1927 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1928 /* BLAS integers */ 1929 PetscBLASInt lwork,lierr; 1930 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1931 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1932 /* reuse */ 1933 PetscInt olocal_primal_size,olocal_primal_size_cc; 1934 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1935 /* change of basis */ 1936 PetscBool qr_needed; 1937 PetscBT change_basis,qr_needed_idx; 1938 /* auxiliary stuff */ 1939 PetscInt *nnz,*is_indices; 1940 PetscInt ncc; 1941 /* some quantities */ 1942 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1943 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1944 1945 PetscFunctionBegin; 1946 /* Destroy Mat objects computed previously */ 1947 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1948 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1949 /* save info on constraints from previous setup (if any) */ 1950 olocal_primal_size = pcbddc->local_primal_size; 1951 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1952 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1953 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1954 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1955 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1956 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1957 1958 /* print some info */ 1959 if (pcbddc->dbg_flag) { 1960 IS vertices; 1961 PetscInt nv,nedges,nfaces; 1962 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1963 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1964 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1965 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1966 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1967 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1968 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1969 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1970 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1971 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1972 } 1973 1974 if (!pcbddc->adaptive_selection) { 1975 IS ISForVertices,*ISForFaces,*ISForEdges; 1976 MatNullSpace nearnullsp; 1977 const Vec *nearnullvecs; 1978 Vec *localnearnullsp; 1979 PetscScalar *array; 1980 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1981 PetscBool nnsp_has_cnst; 1982 /* LAPACK working arrays for SVD or POD */ 1983 PetscBool skip_lapack,boolforchange; 1984 PetscScalar *work; 1985 PetscReal *singular_vals; 1986 #if defined(PETSC_USE_COMPLEX) 1987 PetscReal *rwork; 1988 #endif 1989 #if defined(PETSC_MISSING_LAPACK_GESVD) 1990 PetscScalar *temp_basis,*correlation_mat; 1991 #else 1992 PetscBLASInt dummy_int=1; 1993 PetscScalar dummy_scalar=1.; 1994 #endif 1995 1996 /* Get index sets for faces, edges and vertices from graph */ 1997 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1998 /* free unneeded index sets */ 1999 if (!pcbddc->use_vertices) { 2000 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2001 } 2002 if (!pcbddc->use_edges) { 2003 for (i=0;i<n_ISForEdges;i++) { 2004 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2005 } 2006 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2007 n_ISForEdges = 0; 2008 } 2009 if (!pcbddc->use_faces) { 2010 for (i=0;i<n_ISForFaces;i++) { 2011 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2012 } 2013 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2014 n_ISForFaces = 0; 2015 } 2016 2017 #if defined(PETSC_USE_DEBUG) 2018 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2019 Also use_change_of_basis should be consistent among processors */ 2020 if (pcbddc->NullSpace) { 2021 PetscBool tbool[2],gbool[2]; 2022 2023 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2024 pcbddc->use_change_of_basis = PETSC_TRUE; 2025 if (!ISForEdges) { 2026 pcbddc->use_change_on_faces = PETSC_TRUE; 2027 } 2028 } 2029 tbool[0] = pcbddc->use_change_of_basis; 2030 tbool[1] = pcbddc->use_change_on_faces; 2031 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2032 pcbddc->use_change_of_basis = gbool[0]; 2033 pcbddc->use_change_on_faces = gbool[1]; 2034 } 2035 #endif 2036 2037 /* check if near null space is attached to global mat */ 2038 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2039 if (nearnullsp) { 2040 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2041 /* remove any stored info */ 2042 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2043 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2044 /* store information for BDDC solver reuse */ 2045 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2046 pcbddc->onearnullspace = nearnullsp; 2047 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2048 for (i=0;i<nnsp_size;i++) { 2049 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2050 } 2051 } else { /* if near null space is not provided BDDC uses constants by default */ 2052 nnsp_size = 0; 2053 nnsp_has_cnst = PETSC_TRUE; 2054 } 2055 /* get max number of constraints on a single cc */ 2056 max_constraints = nnsp_size; 2057 if (nnsp_has_cnst) max_constraints++; 2058 2059 /* 2060 Evaluate maximum storage size needed by the procedure 2061 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2062 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2063 There can be multiple constraints per connected component 2064 */ 2065 n_vertices = 0; 2066 if (ISForVertices) { 2067 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2068 } 2069 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2070 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2071 2072 total_counts = n_ISForFaces+n_ISForEdges; 2073 total_counts *= max_constraints; 2074 total_counts += n_vertices; 2075 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2076 2077 total_counts = 0; 2078 max_size_of_constraint = 0; 2079 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2080 IS used_is; 2081 if (i<n_ISForEdges) { 2082 used_is = ISForEdges[i]; 2083 } else { 2084 used_is = ISForFaces[i-n_ISForEdges]; 2085 } 2086 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2087 total_counts += j; 2088 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2089 } 2090 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2091 2092 /* get local part of global near null space vectors */ 2093 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2094 for (k=0;k<nnsp_size;k++) { 2095 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2096 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2097 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2098 } 2099 2100 /* whether or not to skip lapack calls */ 2101 skip_lapack = PETSC_TRUE; 2102 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2103 2104 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2105 if (!skip_lapack) { 2106 PetscScalar temp_work; 2107 2108 #if defined(PETSC_MISSING_LAPACK_GESVD) 2109 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2110 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2111 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2112 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2113 #if defined(PETSC_USE_COMPLEX) 2114 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2115 #endif 2116 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2117 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2118 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2119 lwork = -1; 2120 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2121 #if !defined(PETSC_USE_COMPLEX) 2122 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2123 #else 2124 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2125 #endif 2126 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2127 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2128 #else /* on missing GESVD */ 2129 /* SVD */ 2130 PetscInt max_n,min_n; 2131 max_n = max_size_of_constraint; 2132 min_n = max_constraints; 2133 if (max_size_of_constraint < max_constraints) { 2134 min_n = max_size_of_constraint; 2135 max_n = max_constraints; 2136 } 2137 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2138 #if defined(PETSC_USE_COMPLEX) 2139 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2140 #endif 2141 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2142 lwork = -1; 2143 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2144 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2145 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2146 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2147 #if !defined(PETSC_USE_COMPLEX) 2148 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2149 #else 2150 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2151 #endif 2152 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2153 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2154 #endif /* on missing GESVD */ 2155 /* Allocate optimal workspace */ 2156 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2157 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2158 } 2159 /* Now we can loop on constraining sets */ 2160 total_counts = 0; 2161 constraints_idxs_ptr[0] = 0; 2162 constraints_data_ptr[0] = 0; 2163 /* vertices */ 2164 if (n_vertices) { 2165 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2166 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2167 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2168 for (i=0;i<n_vertices;i++) { 2169 constraints_n[total_counts] = 1; 2170 constraints_data[total_counts] = 1.0; 2171 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2172 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2173 total_counts++; 2174 } 2175 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2176 PetscBool used_vertex; 2177 for (i=0;i<n_vertices;i++) { 2178 used_vertex = PETSC_FALSE; 2179 k = 0; 2180 while (!used_vertex && k<nnsp_size) { 2181 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2182 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2183 constraints_n[total_counts] = 1; 2184 constraints_idxs[total_counts] = is_indices[i]; 2185 constraints_data[total_counts] = 1.0; 2186 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2187 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2188 total_counts++; 2189 used_vertex = PETSC_TRUE; 2190 } 2191 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2192 k++; 2193 } 2194 } 2195 } 2196 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2197 n_vertices = total_counts; 2198 } 2199 2200 /* edges and faces */ 2201 total_counts_cc = total_counts; 2202 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2203 IS used_is; 2204 PetscBool idxs_copied = PETSC_FALSE; 2205 2206 if (ncc<n_ISForEdges) { 2207 used_is = ISForEdges[ncc]; 2208 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2209 } else { 2210 used_is = ISForFaces[ncc-n_ISForEdges]; 2211 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2212 } 2213 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2214 2215 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2216 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2217 /* change of basis should not be performed on local periodic nodes */ 2218 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2219 if (nnsp_has_cnst) { 2220 PetscScalar quad_value; 2221 2222 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2223 idxs_copied = PETSC_TRUE; 2224 2225 if (!pcbddc->use_nnsp_true) { 2226 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2227 } else { 2228 quad_value = 1.0; 2229 } 2230 for (j=0;j<size_of_constraint;j++) { 2231 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2232 } 2233 temp_constraints++; 2234 total_counts++; 2235 } 2236 for (k=0;k<nnsp_size;k++) { 2237 PetscReal real_value; 2238 PetscScalar *ptr_to_data; 2239 2240 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2241 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2242 for (j=0;j<size_of_constraint;j++) { 2243 ptr_to_data[j] = array[is_indices[j]]; 2244 } 2245 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2246 /* check if array is null on the connected component */ 2247 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2248 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2249 if (real_value > 0.0) { /* keep indices and values */ 2250 temp_constraints++; 2251 total_counts++; 2252 if (!idxs_copied) { 2253 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2254 idxs_copied = PETSC_TRUE; 2255 } 2256 } 2257 } 2258 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2259 valid_constraints = temp_constraints; 2260 if (!pcbddc->use_nnsp_true && temp_constraints) { 2261 if (temp_constraints == 1) { /* just normalize the constraint */ 2262 PetscScalar norm,*ptr_to_data; 2263 2264 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2265 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2266 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2267 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2268 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2269 } else { /* perform SVD */ 2270 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2271 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2272 2273 #if defined(PETSC_MISSING_LAPACK_GESVD) 2274 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2275 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2276 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2277 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2278 from that computed using LAPACKgesvd 2279 -> This is due to a different computation of eigenvectors in LAPACKheev 2280 -> The quality of the POD-computed basis will be the same */ 2281 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2282 /* Store upper triangular part of correlation matrix */ 2283 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2284 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2285 for (j=0;j<temp_constraints;j++) { 2286 for (k=0;k<j+1;k++) { 2287 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2288 } 2289 } 2290 /* compute eigenvalues and eigenvectors of correlation matrix */ 2291 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2292 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2293 #if !defined(PETSC_USE_COMPLEX) 2294 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2295 #else 2296 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2297 #endif 2298 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2299 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2300 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2301 j = 0; 2302 while (j < temp_constraints && singular_vals[j] < tol) j++; 2303 total_counts = total_counts-j; 2304 valid_constraints = temp_constraints-j; 2305 /* scale and copy POD basis into used quadrature memory */ 2306 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2307 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2308 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2309 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2310 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2311 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2312 if (j<temp_constraints) { 2313 PetscInt ii; 2314 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2315 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2316 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2317 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2318 for (k=0;k<temp_constraints-j;k++) { 2319 for (ii=0;ii<size_of_constraint;ii++) { 2320 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2321 } 2322 } 2323 } 2324 #else /* on missing GESVD */ 2325 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2326 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2327 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2328 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2329 #if !defined(PETSC_USE_COMPLEX) 2330 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2331 #else 2332 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2333 #endif 2334 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2335 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2336 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2337 k = temp_constraints; 2338 if (k > size_of_constraint) k = size_of_constraint; 2339 j = 0; 2340 while (j < k && singular_vals[k-j-1] < tol) j++; 2341 valid_constraints = k-j; 2342 total_counts = total_counts-temp_constraints+valid_constraints; 2343 #endif /* on missing GESVD */ 2344 } 2345 } 2346 /* update pointers information */ 2347 if (valid_constraints) { 2348 constraints_n[total_counts_cc] = valid_constraints; 2349 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2350 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2351 /* set change_of_basis flag */ 2352 if (boolforchange) { 2353 PetscBTSet(change_basis,total_counts_cc); 2354 } 2355 total_counts_cc++; 2356 } 2357 } 2358 /* free workspace */ 2359 if (!skip_lapack) { 2360 ierr = PetscFree(work);CHKERRQ(ierr); 2361 #if defined(PETSC_USE_COMPLEX) 2362 ierr = PetscFree(rwork);CHKERRQ(ierr); 2363 #endif 2364 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2365 #if defined(PETSC_MISSING_LAPACK_GESVD) 2366 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2367 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2368 #endif 2369 } 2370 for (k=0;k<nnsp_size;k++) { 2371 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2372 } 2373 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2374 /* free index sets of faces, edges and vertices */ 2375 for (i=0;i<n_ISForFaces;i++) { 2376 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2377 } 2378 if (n_ISForFaces) { 2379 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2380 } 2381 for (i=0;i<n_ISForEdges;i++) { 2382 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2383 } 2384 if (n_ISForEdges) { 2385 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2386 } 2387 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2388 } else { 2389 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2390 2391 total_counts = 0; 2392 n_vertices = 0; 2393 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2394 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2395 } 2396 max_constraints = 0; 2397 total_counts_cc = 0; 2398 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2399 total_counts += pcbddc->adaptive_constraints_n[i]; 2400 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2401 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2402 } 2403 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2404 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2405 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2406 constraints_data = pcbddc->adaptive_constraints_data; 2407 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2408 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2409 total_counts_cc = 0; 2410 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2411 if (pcbddc->adaptive_constraints_n[i]) { 2412 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2413 } 2414 } 2415 #if 0 2416 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2417 for (i=0;i<total_counts_cc;i++) { 2418 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2419 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2420 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2421 printf(" %d",constraints_idxs[j]); 2422 } 2423 printf("\n"); 2424 printf("number of cc: %d\n",constraints_n[i]); 2425 } 2426 for (i=0;i<n_vertices;i++) { 2427 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2428 } 2429 for (i=0;i<sub_schurs->n_subs;i++) { 2430 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2431 } 2432 #endif 2433 2434 max_size_of_constraint = 0; 2435 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2436 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2437 /* Change of basis */ 2438 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2439 if (pcbddc->use_change_of_basis) { 2440 for (i=0;i<sub_schurs->n_subs;i++) { 2441 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2442 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2443 } 2444 } 2445 } 2446 } 2447 pcbddc->local_primal_size = total_counts; 2448 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2449 2450 /* map constraints_idxs in boundary numbering */ 2451 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2452 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 2453 2454 /* Create constraint matrix */ 2455 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2456 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2457 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2458 2459 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2460 /* determine if a QR strategy is needed for change of basis */ 2461 qr_needed = PETSC_FALSE; 2462 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2463 total_primal_vertices=0; 2464 pcbddc->local_primal_size_cc = 0; 2465 for (i=0;i<total_counts_cc;i++) { 2466 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2467 if (size_of_constraint == 1) { 2468 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2469 pcbddc->local_primal_size_cc += 1; 2470 } else if (PetscBTLookup(change_basis,i)) { 2471 for (k=0;k<constraints_n[i];k++) { 2472 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2473 } 2474 pcbddc->local_primal_size_cc += constraints_n[i]; 2475 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2476 PetscBTSet(qr_needed_idx,i); 2477 qr_needed = PETSC_TRUE; 2478 } 2479 } else { 2480 pcbddc->local_primal_size_cc += 1; 2481 } 2482 } 2483 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2484 pcbddc->n_vertices = total_primal_vertices; 2485 /* permute indices in order to have a sorted set of vertices */ 2486 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2487 2488 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2489 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2490 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2491 2492 /* nonzero structure of constraint matrix */ 2493 /* and get reference dof for local constraints */ 2494 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2495 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2496 2497 j = total_primal_vertices; 2498 total_counts = total_primal_vertices; 2499 cum = total_primal_vertices; 2500 for (i=n_vertices;i<total_counts_cc;i++) { 2501 if (!PetscBTLookup(change_basis,i)) { 2502 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2503 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2504 cum++; 2505 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2506 for (k=0;k<constraints_n[i];k++) { 2507 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2508 nnz[j+k] = size_of_constraint; 2509 } 2510 j += constraints_n[i]; 2511 } 2512 } 2513 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2514 ierr = PetscFree(nnz);CHKERRQ(ierr); 2515 2516 /* set values in constraint matrix */ 2517 for (i=0;i<total_primal_vertices;i++) { 2518 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2519 } 2520 total_counts = total_primal_vertices; 2521 for (i=n_vertices;i<total_counts_cc;i++) { 2522 if (!PetscBTLookup(change_basis,i)) { 2523 PetscInt *cols; 2524 2525 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2526 cols = constraints_idxs+constraints_idxs_ptr[i]; 2527 for (k=0;k<constraints_n[i];k++) { 2528 PetscInt row = total_counts+k; 2529 PetscScalar *vals; 2530 2531 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2532 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2533 } 2534 total_counts += constraints_n[i]; 2535 } 2536 } 2537 /* assembling */ 2538 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2539 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2540 2541 /* 2542 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2543 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2544 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 2545 */ 2546 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2547 if (pcbddc->use_change_of_basis) { 2548 /* dual and primal dofs on a single cc */ 2549 PetscInt dual_dofs,primal_dofs; 2550 /* working stuff for GEQRF */ 2551 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2552 PetscBLASInt lqr_work; 2553 /* working stuff for UNGQR */ 2554 PetscScalar *gqr_work,lgqr_work_t; 2555 PetscBLASInt lgqr_work; 2556 /* working stuff for TRTRS */ 2557 PetscScalar *trs_rhs; 2558 PetscBLASInt Blas_NRHS; 2559 /* pointers for values insertion into change of basis matrix */ 2560 PetscInt *start_rows,*start_cols; 2561 PetscScalar *start_vals; 2562 /* working stuff for values insertion */ 2563 PetscBT is_primal; 2564 PetscInt *aux_primal_numbering_B; 2565 /* matrix sizes */ 2566 PetscInt global_size,local_size; 2567 /* temporary change of basis */ 2568 Mat localChangeOfBasisMatrix; 2569 /* extra space for debugging */ 2570 PetscScalar *dbg_work; 2571 2572 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2573 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2574 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2575 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2576 /* nonzeros for local mat */ 2577 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2578 for (i=0;i<pcis->n;i++) nnz[i]=1; 2579 for (i=n_vertices;i<total_counts_cc;i++) { 2580 if (PetscBTLookup(change_basis,i)) { 2581 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2582 if (PetscBTLookup(qr_needed_idx,i)) { 2583 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2584 } else { 2585 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2586 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2587 } 2588 } 2589 } 2590 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2591 ierr = PetscFree(nnz);CHKERRQ(ierr); 2592 /* Set initial identity in the matrix */ 2593 for (i=0;i<pcis->n;i++) { 2594 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2595 } 2596 2597 if (pcbddc->dbg_flag) { 2598 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2600 } 2601 2602 2603 /* Now we loop on the constraints which need a change of basis */ 2604 /* 2605 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2606 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2607 2608 Basic blocks of change of basis matrix T computed by 2609 2610 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2611 2612 | 1 0 ... 0 s_1/S | 2613 | 0 1 ... 0 s_2/S | 2614 | ... | 2615 | 0 ... 1 s_{n-1}/S | 2616 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2617 2618 with S = \sum_{i=1}^n s_i^2 2619 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2620 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2621 2622 - QR decomposition of constraints otherwise 2623 */ 2624 if (qr_needed) { 2625 /* space to store Q */ 2626 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2627 /* first we issue queries for optimal work */ 2628 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2629 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2630 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2631 lqr_work = -1; 2632 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2633 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2634 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2635 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2636 lgqr_work = -1; 2637 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2638 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2639 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2640 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2641 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2642 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2643 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2644 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2645 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2646 /* array to store scaling factors for reflectors */ 2647 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2648 /* array to store rhs and solution of triangular solver */ 2649 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2650 /* allocating workspace for check */ 2651 if (pcbddc->dbg_flag) { 2652 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2653 } 2654 } 2655 /* array to store whether a node is primal or not */ 2656 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2657 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2658 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2659 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 2660 for (i=0;i<total_primal_vertices;i++) { 2661 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2662 } 2663 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2664 2665 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2666 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2667 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2668 if (PetscBTLookup(change_basis,total_counts)) { 2669 /* get constraint info */ 2670 primal_dofs = constraints_n[total_counts]; 2671 dual_dofs = size_of_constraint-primal_dofs; 2672 2673 if (pcbddc->dbg_flag) { 2674 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2675 } 2676 2677 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2678 2679 /* copy quadrature constraints for change of basis check */ 2680 if (pcbddc->dbg_flag) { 2681 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2682 } 2683 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2684 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2685 2686 /* compute QR decomposition of constraints */ 2687 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2688 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2689 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2690 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2691 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2692 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2693 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2694 2695 /* explictly compute R^-T */ 2696 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2697 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2698 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2699 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2700 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2701 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2702 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2703 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2704 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2705 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2706 2707 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2709 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2710 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2711 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2712 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2713 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2714 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2715 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2716 2717 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2718 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2719 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2720 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2721 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2726 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2727 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2728 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2729 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2730 2731 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2732 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2733 /* insert cols for primal dofs */ 2734 for (j=0;j<primal_dofs;j++) { 2735 start_vals = &qr_basis[j*size_of_constraint]; 2736 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2737 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2738 } 2739 /* insert cols for dual dofs */ 2740 for (j=0,k=0;j<dual_dofs;k++) { 2741 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2742 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2743 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2744 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2745 j++; 2746 } 2747 } 2748 2749 /* check change of basis */ 2750 if (pcbddc->dbg_flag) { 2751 PetscInt ii,jj; 2752 PetscBool valid_qr=PETSC_TRUE; 2753 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2754 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2755 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2756 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2757 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2758 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2759 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2760 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2761 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2762 for (jj=0;jj<size_of_constraint;jj++) { 2763 for (ii=0;ii<primal_dofs;ii++) { 2764 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2765 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2766 } 2767 } 2768 if (!valid_qr) { 2769 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2770 for (jj=0;jj<size_of_constraint;jj++) { 2771 for (ii=0;ii<primal_dofs;ii++) { 2772 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2773 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2774 } 2775 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2776 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2777 } 2778 } 2779 } 2780 } else { 2781 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2782 } 2783 } 2784 } else { /* simple transformation block */ 2785 PetscInt row,col; 2786 PetscScalar val,norm; 2787 2788 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2789 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2790 for (j=0;j<size_of_constraint;j++) { 2791 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2792 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2793 if (!PetscBTLookup(is_primal,row_B)) { 2794 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2795 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2796 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2797 } else { 2798 for (k=0;k<size_of_constraint;k++) { 2799 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2800 if (row != col) { 2801 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2802 } else { 2803 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2804 } 2805 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2806 } 2807 } 2808 } 2809 if (pcbddc->dbg_flag) { 2810 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2811 } 2812 } 2813 } else { 2814 if (pcbddc->dbg_flag) { 2815 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2816 } 2817 } 2818 } 2819 2820 /* free workspace */ 2821 if (qr_needed) { 2822 if (pcbddc->dbg_flag) { 2823 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2824 } 2825 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2826 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2827 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2828 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2829 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2830 } 2831 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2832 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2833 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2834 2835 /* assembling of global change of variable */ 2836 { 2837 Mat tmat; 2838 PetscInt bs; 2839 2840 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2841 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2842 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2843 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2844 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2845 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2846 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2847 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2848 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2849 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2850 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2851 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2852 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2853 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2854 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2855 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2856 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2857 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2858 } 2859 /* check */ 2860 if (pcbddc->dbg_flag) { 2861 PetscReal error; 2862 Vec x,x_change; 2863 2864 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2865 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2866 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2867 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2868 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2869 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2870 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2871 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2872 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2873 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2874 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2875 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2876 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2877 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2878 ierr = VecDestroy(&x);CHKERRQ(ierr); 2879 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2880 } 2881 2882 /* adapt sub_schurs computed (if any) */ 2883 if (pcbddc->use_deluxe_scaling) { 2884 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2885 if (sub_schurs->S_Ej_all) { 2886 Mat S_new,tmat; 2887 IS is_all_N; 2888 2889 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2890 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2891 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2892 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2893 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2894 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2895 sub_schurs->S_Ej_all = S_new; 2896 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2897 if (sub_schurs->sum_S_Ej_all) { 2898 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2899 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2900 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2901 sub_schurs->sum_S_Ej_all = S_new; 2902 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2903 } 2904 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2905 } 2906 } 2907 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2908 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2909 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2910 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2911 } 2912 2913 /* set up change of basis context */ 2914 if (pcbddc->ChangeOfBasisMatrix) { 2915 PCBDDCChange_ctx change_ctx; 2916 2917 if (!pcbddc->new_global_mat) { 2918 PetscInt global_size,local_size; 2919 2920 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2921 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2922 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2923 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2924 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2925 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2926 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2927 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2928 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2929 } else { 2930 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2931 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2932 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2933 } 2934 if (!pcbddc->user_ChangeOfBasisMatrix) { 2935 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2936 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2937 } else { 2938 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2939 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2940 } 2941 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2942 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2943 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2944 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2945 } 2946 2947 /* check if a new primal space has been introduced */ 2948 pcbddc->new_primal_space_local = PETSC_TRUE; 2949 if (olocal_primal_size == pcbddc->local_primal_size) { 2950 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2951 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2952 if (!pcbddc->new_primal_space_local) { 2953 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2954 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2955 } 2956 } 2957 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2958 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2959 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2960 2961 /* flush dbg viewer */ 2962 if (pcbddc->dbg_flag) { 2963 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2964 } 2965 2966 /* free workspace */ 2967 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2968 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2969 if (!pcbddc->adaptive_selection) { 2970 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2971 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2972 } else { 2973 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2974 pcbddc->adaptive_constraints_idxs_ptr, 2975 pcbddc->adaptive_constraints_data_ptr, 2976 pcbddc->adaptive_constraints_idxs, 2977 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2978 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2979 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2980 } 2981 PetscFunctionReturn(0); 2982 } 2983 2984 #undef __FUNCT__ 2985 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2986 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2987 { 2988 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2989 PC_IS *pcis = (PC_IS*)pc->data; 2990 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2991 PetscInt ierr,i,vertex_size,N; 2992 PetscViewer viewer=pcbddc->dbg_viewer; 2993 2994 PetscFunctionBegin; 2995 /* Reset previously computed graph */ 2996 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2997 /* Init local Graph struct */ 2998 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2999 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3000 3001 /* Check validity of the csr graph passed in by the user */ 3002 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3003 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3004 } 3005 3006 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3007 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3008 PetscInt *xadj,*adjncy; 3009 PetscInt nvtxs; 3010 PetscBool flg_row=PETSC_FALSE; 3011 3012 if (pcbddc->use_local_adj) { 3013 3014 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3015 if (flg_row) { 3016 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3017 pcbddc->computed_rowadj = PETSC_TRUE; 3018 } 3019 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3020 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3021 IS is_dummy; 3022 ISLocalToGlobalMapping l2gmap_dummy; 3023 PetscInt j,sum; 3024 PetscInt *cxadj,*cadjncy; 3025 const PetscInt *idxs; 3026 PCBDDCGraph graph; 3027 PetscBT is_on_boundary; 3028 3029 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3030 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3031 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3032 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3033 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3034 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3035 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3036 if (flg_row) { 3037 graph->xadj = xadj; 3038 graph->adjncy = adjncy; 3039 } 3040 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3041 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3042 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3043 3044 if (pcbddc->dbg_flag) { 3045 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3046 for (i=0;i<graph->ncc;i++) { 3047 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3048 } 3049 } 3050 3051 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3052 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3053 for (i=0;i<pcis->n_B;i++) { 3054 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3055 } 3056 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3057 3058 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3059 sum = 0; 3060 for (i=0;i<graph->ncc;i++) { 3061 PetscInt sizecc = 0; 3062 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3063 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3064 sizecc++; 3065 } 3066 } 3067 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3068 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3069 cxadj[graph->queue[j]] = sizecc; 3070 } 3071 } 3072 sum += sizecc*sizecc; 3073 } 3074 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3075 sum = 0; 3076 for (i=0;i<pcis->n;i++) { 3077 PetscInt temp = cxadj[i]; 3078 cxadj[i] = sum; 3079 sum += temp; 3080 } 3081 cxadj[pcis->n] = sum; 3082 for (i=0;i<graph->ncc;i++) { 3083 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3084 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3085 PetscInt k,sizecc = 0; 3086 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3087 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3088 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3089 sizecc++; 3090 } 3091 } 3092 } 3093 } 3094 } 3095 if (sum) { 3096 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3097 } else { 3098 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3099 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3100 } 3101 graph->xadj = 0; 3102 graph->adjncy = 0; 3103 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3104 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3105 } 3106 } 3107 if (pcbddc->dbg_flag) { 3108 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3109 } 3110 3111 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3112 vertex_size = 1; 3113 if (pcbddc->user_provided_isfordofs) { 3114 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3115 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3116 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3117 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3118 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3119 } 3120 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3121 pcbddc->n_ISForDofs = 0; 3122 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3123 } 3124 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3125 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3126 } else { 3127 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3128 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3129 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3130 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3131 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3132 } 3133 } 3134 } 3135 3136 /* Setup of Graph */ 3137 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3138 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3139 } 3140 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3141 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3142 } 3143 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3144 3145 /* Graph's connected components analysis */ 3146 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3147 3148 /* print some info to stdout */ 3149 if (pcbddc->dbg_flag) { 3150 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3151 } 3152 3153 /* mark topography has done */ 3154 pcbddc->recompute_topography = PETSC_FALSE; 3155 PetscFunctionReturn(0); 3156 } 3157 3158 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "PCBDDCSubsetNumbering" 3161 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3162 { 3163 PetscSF sf; 3164 PetscLayout map; 3165 const PetscInt *idxs; 3166 PetscInt *leaf_data,*root_data,*gidxs; 3167 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3168 PetscInt n_n,nlocals,start,first_index; 3169 PetscMPIInt commsize; 3170 PetscBool first_found; 3171 PetscErrorCode ierr; 3172 3173 PetscFunctionBegin; 3174 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3175 if (subset_mult) { 3176 PetscCheckSameComm(subset,1,subset_mult,2); 3177 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3178 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3179 } 3180 /* create workspace layout for computing global indices of subset */ 3181 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3182 lbounds[0] = lbounds[1] = 0; 3183 for (i=0;i<n;i++) { 3184 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3185 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3186 } 3187 lbounds[0] = -lbounds[0]; 3188 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3189 gbounds[0] = -gbounds[0]; 3190 N = gbounds[1] - gbounds[0] + 1; 3191 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3192 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3193 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3194 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3195 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3196 3197 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3198 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3199 if (subset_mult) { 3200 const PetscInt* idxs_mult; 3201 3202 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3203 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3204 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3205 } else { 3206 for (i=0;i<n;i++) leaf_data[i] = 1; 3207 } 3208 /* local size of new subset */ 3209 n_n = 0; 3210 for (i=0;i<n;i++) n_n += leaf_data[i]; 3211 3212 /* global indexes in layout */ 3213 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3214 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3215 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3216 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3217 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3218 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3219 3220 /* reduce from leaves to roots */ 3221 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3222 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3223 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3224 3225 /* count indexes in local part of layout */ 3226 nlocals = 0; 3227 first_index = -1; 3228 first_found = PETSC_FALSE; 3229 for (i=0;i<Nl;i++) { 3230 if (!first_found && root_data[i]) { 3231 first_found = PETSC_TRUE; 3232 first_index = i; 3233 } 3234 nlocals += root_data[i]; 3235 } 3236 3237 /* cumulative of number of indexes and size of subset without holes */ 3238 #if defined(PETSC_HAVE_MPI_EXSCAN) 3239 start = 0; 3240 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3241 #else 3242 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3243 start = start-nlocals; 3244 #endif 3245 3246 if (N_n) { /* compute total size of new subset if requested */ 3247 *N_n = start + nlocals; 3248 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3249 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3250 } 3251 3252 /* adapt root data with cumulative */ 3253 if (first_found) { 3254 PetscInt old_index; 3255 3256 root_data[first_index] += start; 3257 old_index = first_index; 3258 for (i=first_index+1;i<Nl;i++) { 3259 if (root_data[i]) { 3260 root_data[i] += root_data[old_index]; 3261 old_index = i; 3262 } 3263 } 3264 } 3265 3266 /* from roots to leaves */ 3267 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3268 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3269 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3270 3271 /* create new IS with global indexes without holes */ 3272 if (subset_mult) { 3273 const PetscInt* idxs_mult; 3274 PetscInt cum; 3275 3276 cum = 0; 3277 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3278 for (i=0;i<n;i++) { 3279 PetscInt j; 3280 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3281 } 3282 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3283 } else { 3284 for (i=0;i<n;i++) { 3285 gidxs[i] = leaf_data[i]-1; 3286 } 3287 } 3288 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3289 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3290 PetscFunctionReturn(0); 3291 } 3292 3293 #undef __FUNCT__ 3294 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3295 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3296 { 3297 PetscInt i,j; 3298 PetscScalar *alphas; 3299 PetscErrorCode ierr; 3300 3301 PetscFunctionBegin; 3302 /* this implements stabilized Gram-Schmidt */ 3303 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3304 for (i=0;i<n;i++) { 3305 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3306 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3307 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3308 } 3309 ierr = PetscFree(alphas);CHKERRQ(ierr); 3310 PetscFunctionReturn(0); 3311 } 3312 3313 #undef __FUNCT__ 3314 #define __FUNCT__ "MatISGetSubassemblingPattern" 3315 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3316 { 3317 IS ranks_send_to; 3318 PetscInt n_neighs,*neighs,*n_shared,**shared; 3319 PetscMPIInt size,rank,color; 3320 PetscInt *xadj,*adjncy; 3321 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3322 PetscInt i,local_size,threshold=0; 3323 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3324 PetscSubcomm subcomm; 3325 PetscErrorCode ierr; 3326 3327 PetscFunctionBegin; 3328 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3329 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3330 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3331 3332 /* Get info on mapping */ 3333 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3334 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3335 3336 /* build local CSR graph of subdomains' connectivity */ 3337 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3338 xadj[0] = 0; 3339 xadj[1] = PetscMax(n_neighs-1,0); 3340 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3341 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3342 3343 if (threshold) { 3344 PetscInt xadj_count = 0; 3345 for (i=1;i<n_neighs;i++) { 3346 if (n_shared[i] > threshold) { 3347 adjncy[xadj_count] = neighs[i]; 3348 adjncy_wgt[xadj_count] = n_shared[i]; 3349 xadj_count++; 3350 } 3351 } 3352 xadj[1] = xadj_count; 3353 } else { 3354 if (xadj[1]) { 3355 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3356 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3357 } 3358 } 3359 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3360 if (use_square) { 3361 for (i=0;i<xadj[1];i++) { 3362 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3363 } 3364 } 3365 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3366 3367 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3368 3369 /* 3370 Restrict work on active processes only. 3371 */ 3372 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3373 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3374 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3375 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3376 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3377 if (color) { 3378 ierr = PetscFree(xadj);CHKERRQ(ierr); 3379 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3380 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3381 } else { 3382 Mat subdomain_adj; 3383 IS new_ranks,new_ranks_contig; 3384 MatPartitioning partitioner; 3385 PetscInt prank,rstart=0,rend=0; 3386 PetscInt *is_indices,*oldranks; 3387 PetscBool aggregate; 3388 3389 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3390 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3391 prank = rank; 3392 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3393 /* 3394 for (i=0;i<size;i++) { 3395 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3396 } 3397 */ 3398 for (i=0;i<xadj[1];i++) { 3399 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3400 } 3401 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3402 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3403 if (aggregate) { 3404 PetscInt lrows,row,ncols,*cols; 3405 PetscMPIInt nrank; 3406 PetscScalar *vals; 3407 3408 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3409 lrows = 0; 3410 if (nrank<redprocs) { 3411 lrows = size/redprocs; 3412 if (nrank<size%redprocs) lrows++; 3413 } 3414 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3415 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3416 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3417 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3418 row = nrank; 3419 ncols = xadj[1]-xadj[0]; 3420 cols = adjncy; 3421 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3422 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3423 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3424 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3425 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3426 ierr = PetscFree(xadj);CHKERRQ(ierr); 3427 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3428 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3429 ierr = PetscFree(vals);CHKERRQ(ierr); 3430 } else { 3431 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3432 } 3433 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3434 3435 /* Partition */ 3436 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3437 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3438 if (use_vwgt) { 3439 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3440 v_wgt[0] = local_size; 3441 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3442 } 3443 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3444 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3445 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3446 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3447 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3448 3449 /* renumber new_ranks to avoid "holes" in new set of processors */ 3450 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3451 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3452 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3453 if (!redprocs) { 3454 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3455 } else { 3456 PetscInt idxs[1]; 3457 PetscMPIInt tag; 3458 MPI_Request *reqs; 3459 3460 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3461 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3462 for (i=rstart;i<rend;i++) { 3463 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3464 } 3465 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3466 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3467 ierr = PetscFree(reqs);CHKERRQ(ierr); 3468 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3469 } 3470 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3471 /* clean up */ 3472 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3473 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3474 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3475 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3476 } 3477 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3478 3479 /* assemble parallel IS for sends */ 3480 i = 1; 3481 if (color) i=0; 3482 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3483 /* get back IS */ 3484 *is_sends = ranks_send_to; 3485 PetscFunctionReturn(0); 3486 } 3487 3488 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3489 3490 #undef __FUNCT__ 3491 #define __FUNCT__ "MatISSubassemble" 3492 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3493 { 3494 Mat local_mat; 3495 IS is_sends_internal; 3496 PetscInt rows,cols,new_local_rows; 3497 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3498 PetscBool ismatis,isdense,newisdense,destroy_mat; 3499 ISLocalToGlobalMapping l2gmap; 3500 PetscInt* l2gmap_indices; 3501 const PetscInt* is_indices; 3502 MatType new_local_type; 3503 /* buffers */ 3504 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3505 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3506 PetscInt *recv_buffer_idxs_local; 3507 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3508 /* MPI */ 3509 MPI_Comm comm,comm_n; 3510 PetscSubcomm subcomm; 3511 PetscMPIInt n_sends,n_recvs,commsize; 3512 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3513 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3514 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3515 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3516 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3517 PetscErrorCode ierr; 3518 3519 PetscFunctionBegin; 3520 /* TODO: add missing checks */ 3521 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3522 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3523 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3524 PetscValidLogicalCollectiveInt(mat,nis,7); 3525 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3526 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3527 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3528 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3529 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3530 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3531 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3532 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3533 PetscInt mrows,mcols,mnrows,mncols; 3534 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3535 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3536 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3537 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3538 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3539 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3540 } 3541 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3542 PetscValidLogicalCollectiveInt(mat,bs,0); 3543 /* prepare IS for sending if not provided */ 3544 if (!is_sends) { 3545 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3546 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3547 } else { 3548 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3549 is_sends_internal = is_sends; 3550 } 3551 3552 /* get comm */ 3553 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3554 3555 /* compute number of sends */ 3556 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3557 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3558 3559 /* compute number of receives */ 3560 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3561 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3562 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3563 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3564 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3565 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3566 ierr = PetscFree(iflags);CHKERRQ(ierr); 3567 3568 /* restrict comm if requested */ 3569 subcomm = 0; 3570 destroy_mat = PETSC_FALSE; 3571 if (restrict_comm) { 3572 PetscMPIInt color,subcommsize; 3573 3574 color = 0; 3575 if (restrict_full) { 3576 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3577 } else { 3578 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3579 } 3580 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3581 subcommsize = commsize - subcommsize; 3582 /* check if reuse has been requested */ 3583 if (reuse == MAT_REUSE_MATRIX) { 3584 if (*mat_n) { 3585 PetscMPIInt subcommsize2; 3586 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3587 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3588 comm_n = PetscObjectComm((PetscObject)*mat_n); 3589 } else { 3590 comm_n = PETSC_COMM_SELF; 3591 } 3592 } else { /* MAT_INITIAL_MATRIX */ 3593 PetscMPIInt rank; 3594 3595 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3596 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3597 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3598 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3599 comm_n = PetscSubcommChild(subcomm); 3600 } 3601 /* flag to destroy *mat_n if not significative */ 3602 if (color) destroy_mat = PETSC_TRUE; 3603 } else { 3604 comm_n = comm; 3605 } 3606 3607 /* prepare send/receive buffers */ 3608 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3609 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3610 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3611 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3612 if (nis) { 3613 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3614 } 3615 3616 /* Get data from local matrices */ 3617 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3618 /* TODO: See below some guidelines on how to prepare the local buffers */ 3619 /* 3620 send_buffer_vals should contain the raw values of the local matrix 3621 send_buffer_idxs should contain: 3622 - MatType_PRIVATE type 3623 - PetscInt size_of_l2gmap 3624 - PetscInt global_row_indices[size_of_l2gmap] 3625 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3626 */ 3627 else { 3628 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3629 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3630 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3631 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3632 send_buffer_idxs[1] = i; 3633 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3634 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3635 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3636 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3637 for (i=0;i<n_sends;i++) { 3638 ilengths_vals[is_indices[i]] = len*len; 3639 ilengths_idxs[is_indices[i]] = len+2; 3640 } 3641 } 3642 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3643 /* additional is (if any) */ 3644 if (nis) { 3645 PetscMPIInt psum; 3646 PetscInt j; 3647 for (j=0,psum=0;j<nis;j++) { 3648 PetscInt plen; 3649 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3650 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3651 psum += len+1; /* indices + lenght */ 3652 } 3653 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3654 for (j=0,psum=0;j<nis;j++) { 3655 PetscInt plen; 3656 const PetscInt *is_array_idxs; 3657 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3658 send_buffer_idxs_is[psum] = plen; 3659 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3660 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3661 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3662 psum += plen+1; /* indices + lenght */ 3663 } 3664 for (i=0;i<n_sends;i++) { 3665 ilengths_idxs_is[is_indices[i]] = psum; 3666 } 3667 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3668 } 3669 3670 buf_size_idxs = 0; 3671 buf_size_vals = 0; 3672 buf_size_idxs_is = 0; 3673 for (i=0;i<n_recvs;i++) { 3674 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3675 buf_size_vals += (PetscInt)olengths_vals[i]; 3676 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3677 } 3678 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3679 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3680 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3681 3682 /* get new tags for clean communications */ 3683 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3684 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3685 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3686 3687 /* allocate for requests */ 3688 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3689 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3690 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3691 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3692 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3693 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3694 3695 /* communications */ 3696 ptr_idxs = recv_buffer_idxs; 3697 ptr_vals = recv_buffer_vals; 3698 ptr_idxs_is = recv_buffer_idxs_is; 3699 for (i=0;i<n_recvs;i++) { 3700 source_dest = onodes[i]; 3701 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3702 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3703 ptr_idxs += olengths_idxs[i]; 3704 ptr_vals += olengths_vals[i]; 3705 if (nis) { 3706 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3707 ptr_idxs_is += olengths_idxs_is[i]; 3708 } 3709 } 3710 for (i=0;i<n_sends;i++) { 3711 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3712 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3713 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3714 if (nis) { 3715 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3716 } 3717 } 3718 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3719 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3720 3721 /* assemble new l2g map */ 3722 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3723 ptr_idxs = recv_buffer_idxs; 3724 new_local_rows = 0; 3725 for (i=0;i<n_recvs;i++) { 3726 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3727 ptr_idxs += olengths_idxs[i]; 3728 } 3729 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3730 ptr_idxs = recv_buffer_idxs; 3731 new_local_rows = 0; 3732 for (i=0;i<n_recvs;i++) { 3733 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3734 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3735 ptr_idxs += olengths_idxs[i]; 3736 } 3737 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3738 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3739 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3740 3741 /* infer new local matrix type from received local matrices type */ 3742 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3743 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3744 if (n_recvs) { 3745 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3746 ptr_idxs = recv_buffer_idxs; 3747 for (i=0;i<n_recvs;i++) { 3748 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3749 new_local_type_private = MATAIJ_PRIVATE; 3750 break; 3751 } 3752 ptr_idxs += olengths_idxs[i]; 3753 } 3754 switch (new_local_type_private) { 3755 case MATDENSE_PRIVATE: 3756 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3757 new_local_type = MATSEQAIJ; 3758 bs = 1; 3759 } else { /* if I receive only 1 dense matrix */ 3760 new_local_type = MATSEQDENSE; 3761 bs = 1; 3762 } 3763 break; 3764 case MATAIJ_PRIVATE: 3765 new_local_type = MATSEQAIJ; 3766 bs = 1; 3767 break; 3768 case MATBAIJ_PRIVATE: 3769 new_local_type = MATSEQBAIJ; 3770 break; 3771 case MATSBAIJ_PRIVATE: 3772 new_local_type = MATSEQSBAIJ; 3773 break; 3774 default: 3775 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3776 break; 3777 } 3778 } else { /* by default, new_local_type is seqdense */ 3779 new_local_type = MATSEQDENSE; 3780 bs = 1; 3781 } 3782 3783 /* create MATIS object if needed */ 3784 if (reuse == MAT_INITIAL_MATRIX) { 3785 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3786 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3787 } else { 3788 /* it also destroys the local matrices */ 3789 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3790 } 3791 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3792 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3793 3794 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3795 3796 /* Global to local map of received indices */ 3797 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3798 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3799 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3800 3801 /* restore attributes -> type of incoming data and its size */ 3802 buf_size_idxs = 0; 3803 for (i=0;i<n_recvs;i++) { 3804 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3805 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3806 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3807 } 3808 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3809 3810 /* set preallocation */ 3811 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3812 if (!newisdense) { 3813 PetscInt *new_local_nnz=0; 3814 3815 ptr_vals = recv_buffer_vals; 3816 ptr_idxs = recv_buffer_idxs_local; 3817 if (n_recvs) { 3818 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3819 } 3820 for (i=0;i<n_recvs;i++) { 3821 PetscInt j; 3822 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3823 for (j=0;j<*(ptr_idxs+1);j++) { 3824 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3825 } 3826 } else { 3827 /* TODO */ 3828 } 3829 ptr_idxs += olengths_idxs[i]; 3830 } 3831 if (new_local_nnz) { 3832 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3833 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3834 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3835 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3836 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3837 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3838 } else { 3839 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3840 } 3841 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3842 } else { 3843 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3844 } 3845 3846 /* set values */ 3847 ptr_vals = recv_buffer_vals; 3848 ptr_idxs = recv_buffer_idxs_local; 3849 for (i=0;i<n_recvs;i++) { 3850 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3851 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3852 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3853 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3854 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3855 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3856 } else { 3857 /* TODO */ 3858 } 3859 ptr_idxs += olengths_idxs[i]; 3860 ptr_vals += olengths_vals[i]; 3861 } 3862 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3863 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3864 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3867 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3868 3869 #if 0 3870 if (!restrict_comm) { /* check */ 3871 Vec lvec,rvec; 3872 PetscReal infty_error; 3873 3874 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3875 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3876 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3877 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3878 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3879 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3880 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3881 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3882 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3883 } 3884 #endif 3885 3886 /* assemble new additional is (if any) */ 3887 if (nis) { 3888 PetscInt **temp_idxs,*count_is,j,psum; 3889 3890 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3891 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3892 ptr_idxs = recv_buffer_idxs_is; 3893 psum = 0; 3894 for (i=0;i<n_recvs;i++) { 3895 for (j=0;j<nis;j++) { 3896 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3897 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3898 psum += plen; 3899 ptr_idxs += plen+1; /* shift pointer to received data */ 3900 } 3901 } 3902 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3903 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3904 for (i=1;i<nis;i++) { 3905 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3906 } 3907 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3908 ptr_idxs = recv_buffer_idxs_is; 3909 for (i=0;i<n_recvs;i++) { 3910 for (j=0;j<nis;j++) { 3911 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3912 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3913 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3914 ptr_idxs += plen+1; /* shift pointer to received data */ 3915 } 3916 } 3917 for (i=0;i<nis;i++) { 3918 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3919 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3920 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3921 } 3922 ierr = PetscFree(count_is);CHKERRQ(ierr); 3923 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3924 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3925 } 3926 /* free workspace */ 3927 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3928 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3929 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3930 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3931 if (isdense) { 3932 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3933 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3934 } else { 3935 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3936 } 3937 if (nis) { 3938 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3939 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3940 } 3941 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3942 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3943 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3944 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3945 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3946 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3947 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3948 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3949 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3950 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3951 ierr = PetscFree(onodes);CHKERRQ(ierr); 3952 if (nis) { 3953 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3954 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3955 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3956 } 3957 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3958 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3959 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3960 for (i=0;i<nis;i++) { 3961 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3962 } 3963 *mat_n = NULL; 3964 } 3965 PetscFunctionReturn(0); 3966 } 3967 3968 /* temporary hack into ksp private data structure */ 3969 #include <petsc/private/kspimpl.h> 3970 3971 #undef __FUNCT__ 3972 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3973 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3974 { 3975 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3976 PC_IS *pcis = (PC_IS*)pc->data; 3977 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3978 MatNullSpace CoarseNullSpace=NULL; 3979 ISLocalToGlobalMapping coarse_islg; 3980 IS coarse_is,*isarray; 3981 PetscInt i,im_active=-1,active_procs=-1; 3982 PetscInt nis,nisdofs,nisneu; 3983 PC pc_temp; 3984 PCType coarse_pc_type; 3985 KSPType coarse_ksp_type; 3986 PetscBool multilevel_requested,multilevel_allowed; 3987 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3988 Mat t_coarse_mat_is; 3989 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3990 PetscMPIInt all_procs; 3991 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3992 PetscBool compute_vecs = PETSC_FALSE; 3993 PetscScalar *array; 3994 PetscErrorCode ierr; 3995 3996 PetscFunctionBegin; 3997 /* Assign global numbering to coarse dofs */ 3998 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3999 PetscInt ocoarse_size; 4000 compute_vecs = PETSC_TRUE; 4001 ocoarse_size = pcbddc->coarse_size; 4002 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4003 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4004 /* see if we can avoid some work */ 4005 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4006 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4007 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4008 PC pc; 4009 PetscBool isbddc; 4010 4011 /* temporary workaround since PCBDDC does not have a reset method so far */ 4012 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4013 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4014 if (isbddc) { 4015 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4016 } 4017 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4018 coarse_reuse = PETSC_FALSE; 4019 } else { /* we can safely reuse already computed coarse matrix */ 4020 coarse_reuse = PETSC_TRUE; 4021 } 4022 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4023 coarse_reuse = PETSC_FALSE; 4024 } 4025 /* reset any subassembling information */ 4026 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4027 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4028 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4029 coarse_reuse = PETSC_TRUE; 4030 } 4031 4032 /* count "active" (i.e. with positive local size) and "void" processes */ 4033 im_active = !!(pcis->n); 4034 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4035 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4036 void_procs = all_procs-active_procs; 4037 csin_type_simple = PETSC_TRUE; 4038 redist = PETSC_FALSE; 4039 if (pcbddc->current_level && void_procs) { 4040 csin_ml = PETSC_TRUE; 4041 ncoarse_ml = void_procs; 4042 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4043 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4044 csin_ds = PETSC_TRUE; 4045 ncoarse_ds = pcbddc->redistribute_coarse; 4046 redist = PETSC_TRUE; 4047 } else { 4048 csin_ds = PETSC_TRUE; 4049 ncoarse_ds = active_procs; 4050 redist = PETSC_TRUE; 4051 } 4052 } else { 4053 csin_ml = PETSC_FALSE; 4054 ncoarse_ml = all_procs; 4055 if (void_procs) { 4056 csin_ds = PETSC_TRUE; 4057 ncoarse_ds = void_procs; 4058 csin_type_simple = PETSC_FALSE; 4059 } else { 4060 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4061 csin_ds = PETSC_TRUE; 4062 ncoarse_ds = pcbddc->redistribute_coarse; 4063 redist = PETSC_TRUE; 4064 } else { 4065 csin_ds = PETSC_FALSE; 4066 ncoarse_ds = all_procs; 4067 } 4068 } 4069 } 4070 4071 /* 4072 test if we can go multilevel: three conditions must be satisfied: 4073 - we have not exceeded the number of levels requested 4074 - we can actually subassemble the active processes 4075 - we can find a suitable number of MPI processes where we can place the subassembled problem 4076 */ 4077 multilevel_allowed = PETSC_FALSE; 4078 multilevel_requested = PETSC_FALSE; 4079 if (pcbddc->current_level < pcbddc->max_levels) { 4080 multilevel_requested = PETSC_TRUE; 4081 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4082 multilevel_allowed = PETSC_FALSE; 4083 } else { 4084 multilevel_allowed = PETSC_TRUE; 4085 } 4086 } 4087 /* determine number of process partecipating to coarse solver */ 4088 if (multilevel_allowed) { 4089 ncoarse = ncoarse_ml; 4090 csin = csin_ml; 4091 redist = PETSC_FALSE; 4092 } else { 4093 ncoarse = ncoarse_ds; 4094 csin = csin_ds; 4095 } 4096 4097 /* creates temporary l2gmap and IS for coarse indexes */ 4098 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4099 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4100 4101 /* creates temporary MATIS object for coarse matrix */ 4102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4103 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4104 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4105 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4106 #if 0 4107 { 4108 PetscViewer viewer; 4109 char filename[256]; 4110 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4111 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4112 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4113 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4114 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 4115 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4116 } 4117 #endif 4118 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4119 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4120 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4121 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4122 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4123 4124 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4125 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4126 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4127 const PetscInt *idxs; 4128 ISLocalToGlobalMapping tmap; 4129 4130 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4131 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4132 /* allocate space for temporary storage */ 4133 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4134 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4135 /* allocate for IS array */ 4136 nisdofs = pcbddc->n_ISForDofsLocal; 4137 nisneu = !!pcbddc->NeumannBoundariesLocal; 4138 nis = nisdofs + nisneu; 4139 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4140 /* dofs splitting */ 4141 for (i=0;i<nisdofs;i++) { 4142 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4143 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4144 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4145 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4146 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4147 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4149 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4150 } 4151 /* neumann boundaries */ 4152 if (pcbddc->NeumannBoundariesLocal) { 4153 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4154 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4155 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4156 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4157 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4158 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4159 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4160 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4161 } 4162 /* free memory */ 4163 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4164 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4165 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4166 } else { 4167 nis = 0; 4168 nisdofs = 0; 4169 nisneu = 0; 4170 isarray = NULL; 4171 } 4172 /* destroy no longer needed map */ 4173 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4174 4175 /* restrict on coarse candidates (if needed) */ 4176 coarse_mat_is = NULL; 4177 if (csin) { 4178 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4179 if (redist) { 4180 PetscMPIInt rank; 4181 PetscInt spc,n_spc_p1,dest[1],destsize; 4182 4183 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4184 spc = active_procs/ncoarse; 4185 n_spc_p1 = active_procs%ncoarse; 4186 if (im_active) { 4187 destsize = 1; 4188 if (rank > n_spc_p1*(spc+1)-1) { 4189 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4190 } else { 4191 dest[0] = rank/(spc+1); 4192 } 4193 } else { 4194 destsize = 0; 4195 } 4196 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4197 } else if (csin_type_simple) { 4198 PetscMPIInt rank; 4199 PetscInt issize,isidx; 4200 4201 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4202 if (im_active) { 4203 issize = 1; 4204 isidx = (PetscInt)rank; 4205 } else { 4206 issize = 0; 4207 isidx = -1; 4208 } 4209 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4210 } else { /* get a suitable subassembling pattern from MATIS code */ 4211 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4212 } 4213 4214 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4215 if (!redist || ncoarse <= void_procs) { 4216 PetscInt ncoarse_cand,tissize,*nisindices; 4217 PetscInt *coarse_candidates; 4218 const PetscInt* tisindices; 4219 4220 /* get coarse candidates' ranks in pc communicator */ 4221 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4222 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4223 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4224 if (!coarse_candidates[i]) { 4225 coarse_candidates[ncoarse_cand++]=i; 4226 } 4227 } 4228 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4229 4230 4231 if (pcbddc->dbg_flag) { 4232 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4233 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4234 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4235 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4236 for (i=0;i<ncoarse_cand;i++) { 4237 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4238 } 4239 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4240 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4241 } 4242 /* shift the pattern on coarse candidates */ 4243 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4244 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4245 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4246 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4247 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4248 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4249 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4250 } 4251 if (pcbddc->dbg_flag) { 4252 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4253 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4254 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4255 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4256 } 4257 } 4258 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4259 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4260 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4261 } else { /* this is the last level, so use just receiving processes in subcomm */ 4262 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4263 } 4264 } else { 4265 if (pcbddc->dbg_flag) { 4266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4267 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4268 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4269 } 4270 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4271 coarse_mat_is = t_coarse_mat_is; 4272 } 4273 4274 /* create local to global scatters for coarse problem */ 4275 if (compute_vecs) { 4276 PetscInt lrows; 4277 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4278 if (coarse_mat_is) { 4279 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4280 } else { 4281 lrows = 0; 4282 } 4283 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4284 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4285 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4286 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4287 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4288 } 4289 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4290 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4291 4292 /* set defaults for coarse KSP and PC */ 4293 if (multilevel_allowed) { 4294 coarse_ksp_type = KSPRICHARDSON; 4295 coarse_pc_type = PCBDDC; 4296 } else { 4297 coarse_ksp_type = KSPPREONLY; 4298 coarse_pc_type = PCREDUNDANT; 4299 } 4300 4301 /* print some info if requested */ 4302 if (pcbddc->dbg_flag) { 4303 if (!multilevel_allowed) { 4304 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4305 if (multilevel_requested) { 4306 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4307 } else if (pcbddc->max_levels) { 4308 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4309 } 4310 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4311 } 4312 } 4313 4314 /* create the coarse KSP object only once with defaults */ 4315 if (coarse_mat_is) { 4316 MatReuse coarse_mat_reuse; 4317 PetscViewer dbg_viewer = NULL; 4318 if (pcbddc->dbg_flag) { 4319 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4320 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4321 } 4322 if (!pcbddc->coarse_ksp) { 4323 char prefix[256],str_level[16]; 4324 size_t len; 4325 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4326 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4327 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4328 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4329 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4330 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4331 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4332 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4333 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4334 /* prefix */ 4335 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4336 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4337 if (!pcbddc->current_level) { 4338 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4339 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4340 } else { 4341 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4342 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4343 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4344 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4345 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4346 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4347 } 4348 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4349 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4350 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4351 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4352 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4353 /* allow user customization */ 4354 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4355 } 4356 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4357 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4358 if (nisdofs) { 4359 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4360 for (i=0;i<nisdofs;i++) { 4361 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4362 } 4363 } 4364 if (nisneu) { 4365 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4366 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4367 } 4368 4369 /* get some info after set from options */ 4370 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4371 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4372 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4373 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4374 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4375 isbddc = PETSC_FALSE; 4376 } 4377 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4378 if (isredundant) { 4379 KSP inner_ksp; 4380 PC inner_pc; 4381 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4382 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4383 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4384 } 4385 4386 /* assemble coarse matrix */ 4387 if (coarse_reuse) { 4388 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4389 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4390 coarse_mat_reuse = MAT_REUSE_MATRIX; 4391 } else { 4392 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4393 } 4394 if (isbddc || isnn) { 4395 if (pcbddc->coarsening_ratio > 1) { 4396 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4397 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4398 if (pcbddc->dbg_flag) { 4399 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4400 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4401 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4402 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4403 } 4404 } 4405 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4406 } else { 4407 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4408 coarse_mat = coarse_mat_is; 4409 } 4410 } else { 4411 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4412 } 4413 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4414 4415 /* propagate symmetry info of coarse matrix */ 4416 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4417 if (pc->pmat->symmetric_set) { 4418 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4419 } 4420 if (pc->pmat->hermitian_set) { 4421 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4422 } 4423 if (pc->pmat->spd_set) { 4424 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4425 } 4426 /* set operators */ 4427 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4428 if (pcbddc->dbg_flag) { 4429 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4430 } 4431 } else { /* processes non partecipating to coarse solver (if any) */ 4432 coarse_mat = 0; 4433 } 4434 ierr = PetscFree(isarray);CHKERRQ(ierr); 4435 #if 0 4436 { 4437 PetscViewer viewer; 4438 char filename[256]; 4439 sprintf(filename,"coarse_mat.m"); 4440 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4441 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4442 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4443 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 4444 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4445 } 4446 #endif 4447 4448 /* Compute coarse null space (special handling by BDDC only) */ 4449 #if 0 4450 if (pcbddc->NullSpace) { 4451 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4452 } 4453 #endif 4454 4455 if (pcbddc->coarse_ksp) { 4456 Vec crhs,csol; 4457 PetscBool ispreonly; 4458 4459 if (CoarseNullSpace) { 4460 if (isbddc) { 4461 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4462 } else { 4463 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4464 } 4465 } 4466 /* setup coarse ksp */ 4467 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4468 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4469 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4470 /* hack */ 4471 if (!csol) { 4472 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4473 } 4474 if (!crhs) { 4475 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4476 } 4477 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4478 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4479 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4480 KSP check_ksp; 4481 KSPType check_ksp_type; 4482 PC check_pc; 4483 Vec check_vec,coarse_vec; 4484 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4485 PetscInt its; 4486 PetscBool compute_eigs; 4487 PetscReal *eigs_r,*eigs_c; 4488 PetscInt neigs; 4489 const char *prefix; 4490 4491 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4492 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4493 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4494 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4495 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4496 if (ispreonly) { 4497 check_ksp_type = KSPPREONLY; 4498 compute_eigs = PETSC_FALSE; 4499 } else { 4500 check_ksp_type = KSPGMRES; 4501 compute_eigs = PETSC_TRUE; 4502 } 4503 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4504 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4505 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4506 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4507 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4508 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4509 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4510 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4511 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4512 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4513 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4514 /* create random vec */ 4515 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4516 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4517 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4518 if (CoarseNullSpace) { 4519 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4520 } 4521 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4522 /* solve coarse problem */ 4523 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4524 if (CoarseNullSpace) { 4525 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4526 } 4527 /* set eigenvalue estimation if preonly has not been requested */ 4528 if (compute_eigs) { 4529 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4530 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4531 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4532 lambda_max = eigs_r[neigs-1]; 4533 lambda_min = eigs_r[0]; 4534 if (pcbddc->use_coarse_estimates) { 4535 if (lambda_max>lambda_min) { 4536 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4537 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4538 } 4539 } 4540 } 4541 4542 /* check coarse problem residual error */ 4543 if (pcbddc->dbg_flag) { 4544 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4545 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4546 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4547 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4548 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4549 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4550 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4551 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4552 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4553 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4554 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4555 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4556 if (compute_eigs) { 4557 PetscReal lambda_max_s,lambda_min_s; 4558 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4559 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4560 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4561 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4562 for (i=0;i<neigs;i++) { 4563 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4564 } 4565 } 4566 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4567 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4568 } 4569 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4570 if (compute_eigs) { 4571 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4572 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4573 } 4574 } 4575 } 4576 /* print additional info */ 4577 if (pcbddc->dbg_flag) { 4578 /* waits until all processes reaches this point */ 4579 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4580 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4581 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4582 } 4583 4584 /* free memory */ 4585 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4586 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4587 PetscFunctionReturn(0); 4588 } 4589 4590 #undef __FUNCT__ 4591 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4592 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4593 { 4594 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4595 PC_IS* pcis = (PC_IS*)pc->data; 4596 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4597 IS subset,subset_mult,subset_n; 4598 PetscInt local_size,coarse_size=0; 4599 PetscInt *local_primal_indices=NULL; 4600 const PetscInt *t_local_primal_indices; 4601 PetscErrorCode ierr; 4602 4603 PetscFunctionBegin; 4604 /* Compute global number of coarse dofs */ 4605 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4606 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4607 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4608 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4609 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4610 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4611 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4612 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4613 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4614 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 4615 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4616 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4617 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4618 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4619 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4620 4621 /* check numbering */ 4622 if (pcbddc->dbg_flag) { 4623 PetscScalar coarsesum,*array; 4624 PetscInt i; 4625 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4626 4627 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4628 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4629 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4630 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4631 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4632 for (i=0;i<pcbddc->local_primal_size;i++) { 4633 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4634 } 4635 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4636 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4637 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4638 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4639 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4640 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4641 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4642 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4643 for (i=0;i<pcis->n;i++) { 4644 if (array[i] == 1.0) { 4645 set_error = PETSC_TRUE; 4646 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4647 } 4648 } 4649 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4650 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4651 for (i=0;i<pcis->n;i++) { 4652 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4653 } 4654 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4655 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4656 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4657 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4658 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4660 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4661 PetscInt *gidxs; 4662 4663 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4664 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4665 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4666 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4667 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4668 for (i=0;i<pcbddc->local_primal_size;i++) { 4669 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4670 } 4671 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4672 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4673 } 4674 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4675 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4676 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4677 } 4678 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4679 /* get back data */ 4680 *coarse_size_n = coarse_size; 4681 *local_primal_indices_n = local_primal_indices; 4682 PetscFunctionReturn(0); 4683 } 4684 4685 #undef __FUNCT__ 4686 #define __FUNCT__ "PCBDDCGlobalToLocal" 4687 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4688 { 4689 IS localis_t; 4690 PetscInt i,lsize,*idxs,n; 4691 PetscScalar *vals; 4692 PetscErrorCode ierr; 4693 4694 PetscFunctionBegin; 4695 /* get indices in local ordering exploiting local to global map */ 4696 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4697 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4698 for (i=0;i<lsize;i++) vals[i] = 1.0; 4699 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4700 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4701 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4702 if (idxs) { /* multilevel guard */ 4703 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4704 } 4705 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4706 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4707 ierr = PetscFree(vals);CHKERRQ(ierr); 4708 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4709 /* now compute set in local ordering */ 4710 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4711 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4712 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4713 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4714 for (i=0,lsize=0;i<n;i++) { 4715 if (PetscRealPart(vals[i]) > 0.5) { 4716 lsize++; 4717 } 4718 } 4719 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4720 for (i=0,lsize=0;i<n;i++) { 4721 if (PetscRealPart(vals[i]) > 0.5) { 4722 idxs[lsize++] = i; 4723 } 4724 } 4725 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4726 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4727 *localis = localis_t; 4728 PetscFunctionReturn(0); 4729 } 4730 4731 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4732 #undef __FUNCT__ 4733 #define __FUNCT__ "PCBDDCMatMult_Private" 4734 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4735 { 4736 PCBDDCChange_ctx change_ctx; 4737 PetscErrorCode ierr; 4738 4739 PetscFunctionBegin; 4740 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4741 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4742 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4743 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4744 PetscFunctionReturn(0); 4745 } 4746 4747 #undef __FUNCT__ 4748 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4749 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4750 { 4751 PCBDDCChange_ctx change_ctx; 4752 PetscErrorCode ierr; 4753 4754 PetscFunctionBegin; 4755 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4756 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4757 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4758 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4759 PetscFunctionReturn(0); 4760 } 4761 4762 #undef __FUNCT__ 4763 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4764 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4765 { 4766 PC_IS *pcis=(PC_IS*)pc->data; 4767 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4768 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4769 Mat S_j; 4770 PetscInt *used_xadj,*used_adjncy; 4771 PetscBool free_used_adj; 4772 PetscErrorCode ierr; 4773 4774 PetscFunctionBegin; 4775 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4776 free_used_adj = PETSC_FALSE; 4777 if (pcbddc->sub_schurs_layers == -1) { 4778 used_xadj = NULL; 4779 used_adjncy = NULL; 4780 } else { 4781 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4782 used_xadj = pcbddc->mat_graph->xadj; 4783 used_adjncy = pcbddc->mat_graph->adjncy; 4784 } else if (pcbddc->computed_rowadj) { 4785 used_xadj = pcbddc->mat_graph->xadj; 4786 used_adjncy = pcbddc->mat_graph->adjncy; 4787 } else { 4788 PetscBool flg_row=PETSC_FALSE; 4789 const PetscInt *xadj,*adjncy; 4790 PetscInt nvtxs; 4791 4792 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4793 if (flg_row) { 4794 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4795 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4796 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4797 free_used_adj = PETSC_TRUE; 4798 } else { 4799 pcbddc->sub_schurs_layers = -1; 4800 used_xadj = NULL; 4801 used_adjncy = NULL; 4802 } 4803 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4804 } 4805 } 4806 4807 /* setup sub_schurs data */ 4808 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4809 if (!sub_schurs->use_mumps) { 4810 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4811 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4812 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4813 } else { 4814 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4815 PetscBool isseqaij; 4816 if (!pcbddc->use_vertices && reuse_solvers) { 4817 PetscInt n_vertices; 4818 4819 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4820 reuse_solvers = (PetscBool)!n_vertices; 4821 } 4822 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4823 if (!isseqaij) { 4824 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4825 if (matis->A == pcbddc->local_mat) { 4826 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4827 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4828 } else { 4829 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4830 } 4831 } 4832 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4833 } 4834 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4835 4836 /* free adjacency */ 4837 if (free_used_adj) { 4838 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4839 } 4840 PetscFunctionReturn(0); 4841 } 4842 4843 #undef __FUNCT__ 4844 #define __FUNCT__ "PCBDDCInitSubSchurs" 4845 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4846 { 4847 PC_IS *pcis=(PC_IS*)pc->data; 4848 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4849 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4850 PCBDDCGraph graph; 4851 PetscErrorCode ierr; 4852 4853 PetscFunctionBegin; 4854 /* attach interface graph for determining subsets */ 4855 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4856 IS verticesIS,verticescomm; 4857 PetscInt vsize,*idxs; 4858 4859 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4860 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4861 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4862 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4863 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4864 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4865 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4866 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4867 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4868 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4869 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4870 /* 4871 if (pcbddc->dbg_flag) { 4872 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4873 } 4874 */ 4875 } else { 4876 graph = pcbddc->mat_graph; 4877 } 4878 4879 /* sub_schurs init */ 4880 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4881 4882 /* free graph struct */ 4883 if (pcbddc->sub_schurs_rebuild) { 4884 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4885 } 4886 PetscFunctionReturn(0); 4887 } 4888