1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 29 30 if (pcbddc->dbg_flag) { 31 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 32 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 33 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 35 } 36 37 if (pcbddc->dbg_flag) { 38 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 39 } 40 41 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 42 43 /* max size of subsets */ 44 mss = 0; 45 for (i=0;i<sub_schurs->n_subs;i++) { 46 PetscInt subset_size; 47 48 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 49 mss = PetscMax(mss,subset_size); 50 } 51 52 /* min/max and threshold */ 53 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 54 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 55 nmax = PetscMax(nmin,nmax); 56 allocated_S_St = PETSC_FALSE; 57 if (nmin) { 58 allocated_S_St = PETSC_TRUE; 59 } 60 61 /* allocate lapack workspace */ 62 cum = cum2 = 0; 63 maxneigs = 0; 64 for (i=0;i<sub_schurs->n_subs;i++) { 65 PetscInt n,subset_size; 66 67 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 68 n = PetscMin(subset_size,nmax); 69 cum += subset_size; 70 cum2 += subset_size*n; 71 maxneigs = PetscMax(maxneigs,n); 72 } 73 if (mss) { 74 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 75 PetscBLASInt B_itype = 1; 76 PetscBLASInt B_N = mss; 77 PetscReal zero = 0.0; 78 PetscReal eps = 0.0; /* dlamch? */ 79 80 B_lwork = -1; 81 S = NULL; 82 St = NULL; 83 eigs = NULL; 84 eigv = NULL; 85 B_iwork = NULL; 86 B_ifail = NULL; 87 #if defined(PETSC_USE_COMPLEX) 88 rwork = NULL; 89 #endif 90 thresh = 1.0; 91 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 92 #if defined(PETSC_USE_COMPLEX) 93 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 94 #else 95 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 96 #endif 97 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 98 ierr = PetscFPTrapPop();CHKERRQ(ierr); 99 } else { 100 /* TODO */ 101 } 102 } else { 103 lwork = 0; 104 } 105 106 nv = 0; 107 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 108 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 109 } 110 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 111 if (allocated_S_St) { 112 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 113 } 114 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 115 #if defined(PETSC_USE_COMPLEX) 116 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 117 #endif 118 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 119 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 121 nv+cum,&pcbddc->adaptive_constraints_idxs, 122 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 123 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 124 125 maxneigs = 0; 126 cum = cum2 = cumarray = 0; 127 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 128 pcbddc->adaptive_constraints_data_ptr[0] = 0; 129 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 130 const PetscInt *idxs; 131 132 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 133 for (cum=0;cum<nv;cum++) { 134 pcbddc->adaptive_constraints_n[cum] = 1; 135 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 136 pcbddc->adaptive_constraints_data[cum] = 1.0; 137 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 138 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 139 } 140 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 141 } 142 143 if (mss) { /* multilevel */ 144 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 145 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 146 } 147 148 for (i=0;i<sub_schurs->n_subs;i++) { 149 150 const PetscInt *idxs; 151 PetscReal infty = PETSC_MAX_REAL; 152 PetscInt j,subset_size,eigs_start = 0; 153 PetscBLASInt B_N; 154 PetscBool same_data = PETSC_FALSE; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 158 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 159 if (sub_schurs->is_hermitian) { 160 PetscInt j,k; 161 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 162 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 163 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 } 165 for (j=0;j<subset_size;j++) { 166 for (k=j;k<subset_size;k++) { 167 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 168 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 169 } 170 } 171 } else { 172 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 173 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 174 } 175 } else { 176 S = Sarray + cumarray; 177 St = Starray + cumarray; 178 } 179 180 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 181 /* see if we can save some work */ 182 if (sub_schurs->n_subs == 1) { 183 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 184 } 185 186 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 187 B_neigs = 0; 188 } else { 189 /* Threshold: this is an heuristic for edges */ 190 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 191 192 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 193 PetscBLASInt B_itype = 1; 194 PetscBLASInt B_IL, B_IU; 195 PetscReal eps = -1.0; /* dlamch? */ 196 PetscInt nmin_s; 197 198 /* ask for eigenvalues larger than thresh */ 199 if (pcbddc->dbg_flag) { 200 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 201 } 202 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 203 #if defined(PETSC_USE_COMPLEX) 204 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 205 #else 206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 207 #endif 208 ierr = PetscFPTrapPop();CHKERRQ(ierr); 209 if (B_ierr) { 210 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 211 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 212 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 213 } 214 215 if (B_neigs > nmax) { 216 if (pcbddc->dbg_flag) { 217 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 218 } 219 eigs_start = B_neigs -nmax; 220 B_neigs = nmax; 221 } 222 223 nmin_s = PetscMin(nmin,B_N); 224 if (B_neigs < nmin_s) { 225 PetscBLASInt B_neigs2; 226 227 B_IU = B_N - B_neigs; 228 B_IL = B_N - nmin_s + 1; 229 if (pcbddc->dbg_flag) { 230 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 231 } 232 if (sub_schurs->is_hermitian) { 233 PetscInt j; 234 for (j=0;j<subset_size;j++) { 235 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 236 } 237 for (j=0;j<subset_size;j++) { 238 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 239 } 240 } else { 241 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 242 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 } 244 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 245 #if defined(PETSC_USE_COMPLEX) 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 247 #else 248 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 249 #endif 250 ierr = PetscFPTrapPop();CHKERRQ(ierr); 251 B_neigs += B_neigs2; 252 } 253 if (B_ierr) { 254 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 255 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 257 } 258 if (pcbddc->dbg_flag) { 259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 260 for (j=0;j<B_neigs;j++) { 261 if (eigs[j] == 0.0) { 262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 263 } else { 264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 265 } 266 } 267 } 268 } else { 269 /* TODO */ 270 } 271 } 272 maxneigs = PetscMax(B_neigs,maxneigs); 273 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 274 if (B_neigs) { 275 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 276 277 if (pcbddc->dbg_flag > 1) { 278 PetscInt ii; 279 for (ii=0;ii<B_neigs;ii++) { 280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 281 for (j=0;j<B_N;j++) { 282 #if defined(PETSC_USE_COMPLEX) 283 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 284 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 286 #else 287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 288 #endif 289 } 290 } 291 } 292 #if 0 293 for (j=0;j<B_neigs;j++) { 294 PetscBLASInt Blas_N,Blas_one = 1.0; 295 PetscScalar norm; 296 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 297 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 298 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 299 if (pcbddc->adaptive_constraints_data[cum] > 0.0) { 300 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 301 } else { 302 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 303 } 304 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 305 } 306 #endif 307 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 308 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 309 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 310 cum++; 311 } 312 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 313 /* shift for next computation */ 314 cumarray += subset_size*subset_size; 315 } 316 if (pcbddc->dbg_flag) { 317 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 318 } 319 320 if (mss) { 321 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 322 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 323 /* destroy matrices (junk) */ 324 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 325 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 326 } 327 if (allocated_S_St) { 328 ierr = PetscFree2(S,St);CHKERRQ(ierr); 329 } 330 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 331 #if defined(PETSC_USE_COMPLEX) 332 ierr = PetscFree(rwork);CHKERRQ(ierr); 333 #endif 334 if (pcbddc->dbg_flag) { 335 PetscInt maxneigs_r; 336 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 337 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 338 } 339 PetscFunctionReturn(0); 340 } 341 342 #undef __FUNCT__ 343 #define __FUNCT__ "PCBDDCSetUpSolvers" 344 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 345 { 346 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 347 PetscScalar *coarse_submat_vals; 348 PetscErrorCode ierr; 349 350 PetscFunctionBegin; 351 /* Setup local scatters R_to_B and (optionally) R_to_D */ 352 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 353 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 354 355 /* Setup local neumann solver ksp_R */ 356 /* PCBDDCSetUpLocalScatters should be called first! */ 357 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 358 359 /* Change global null space passed in by the user if change of basis has been requested */ 360 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 361 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 362 } 363 364 /* 365 Setup local correction and local part of coarse basis. 366 Gives back the dense local part of the coarse matrix in column major ordering 367 */ 368 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 369 370 /* Compute total number of coarse nodes and setup coarse solver */ 371 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 372 373 /* free */ 374 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 375 PetscFunctionReturn(0); 376 } 377 378 #undef __FUNCT__ 379 #define __FUNCT__ "PCBDDCResetCustomization" 380 PetscErrorCode PCBDDCResetCustomization(PC pc) 381 { 382 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 383 PetscErrorCode ierr; 384 385 PetscFunctionBegin; 386 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 388 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 389 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 390 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 392 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 393 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 394 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 395 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 396 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 397 PetscFunctionReturn(0); 398 } 399 400 #undef __FUNCT__ 401 #define __FUNCT__ "PCBDDCResetTopography" 402 PetscErrorCode PCBDDCResetTopography(PC pc) 403 { 404 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 405 PetscErrorCode ierr; 406 407 PetscFunctionBegin; 408 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 409 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 411 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 412 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "PCBDDCResetSolvers" 418 PetscErrorCode PCBDDCResetSolvers(PC pc) 419 { 420 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 421 PetscScalar *array; 422 PetscErrorCode ierr; 423 424 PetscFunctionBegin; 425 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 426 if (pcbddc->coarse_phi_B) { 427 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 428 ierr = PetscFree(array);CHKERRQ(ierr); 429 } 430 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 431 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 432 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 434 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 436 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 437 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 438 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 440 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 441 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 442 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 443 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 444 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 445 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 446 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 447 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 448 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 449 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 450 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 451 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 452 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 458 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 459 { 460 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 461 PC_IS *pcis = (PC_IS*)pc->data; 462 VecType impVecType; 463 PetscInt n_constraints,n_R,old_size; 464 PetscErrorCode ierr; 465 466 PetscFunctionBegin; 467 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 468 /* get sizes */ 469 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 470 n_R = pcis->n-pcbddc->n_vertices; 471 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 472 /* local work vectors (try to avoid unneeded work)*/ 473 /* R nodes */ 474 old_size = -1; 475 if (pcbddc->vec1_R) { 476 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 477 } 478 if (n_R != old_size) { 479 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 480 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 481 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 482 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 483 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 484 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 485 } 486 /* local primal dofs */ 487 old_size = -1; 488 if (pcbddc->vec1_P) { 489 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 490 } 491 if (pcbddc->local_primal_size != old_size) { 492 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 493 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 494 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 495 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 496 } 497 /* local explicit constraints */ 498 old_size = -1; 499 if (pcbddc->vec1_C) { 500 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 501 } 502 if (n_constraints && n_constraints != old_size) { 503 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 504 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 505 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 506 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 507 } 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "PCBDDCSetUpCorrection" 513 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 514 { 515 PetscErrorCode ierr; 516 /* pointers to pcis and pcbddc */ 517 PC_IS* pcis = (PC_IS*)pc->data; 518 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 519 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 520 /* submatrices of local problem */ 521 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 522 /* submatrices of local coarse problem */ 523 Mat S_VV,S_CV,S_VC,S_CC; 524 /* working matrices */ 525 Mat C_CR; 526 /* additional working stuff */ 527 PC pc_R; 528 Mat F; 529 PetscBool isLU,isCHOL,isILU; 530 531 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 532 PetscScalar *work; 533 PetscInt *idx_V_B; 534 PetscInt n,n_vertices,n_constraints; 535 PetscInt i,n_R,n_D,n_B; 536 PetscBool unsymmetric_check; 537 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 538 MatType impMatType; 539 /* some shortcuts to scalars */ 540 PetscScalar one=1.0,m_one=-1.0; 541 542 PetscFunctionBegin; 543 n_vertices = pcbddc->n_vertices; 544 n_constraints = pcbddc->local_primal_size-n_vertices; 545 /* Set Non-overlapping dimensions */ 546 n_B = pcis->n_B; 547 n_D = pcis->n - n_B; 548 n_R = pcis->n - n_vertices; 549 550 /* Set types for local objects needed by BDDC precondtioner */ 551 impMatType = MATSEQDENSE; 552 553 /* vertices in boundary numbering */ 554 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 555 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 556 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 557 558 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 559 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 560 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 561 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 562 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 563 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 564 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 565 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 566 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 567 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 568 569 unsymmetric_check = PETSC_FALSE; 570 /* allocate workspace */ 571 n = 0; 572 if (n_constraints) { 573 n += n_R*n_constraints; 574 } 575 if (n_vertices) { 576 n = PetscMax(2*n_R*n_vertices,n); 577 n = PetscMax((n_R+n_B)*n_vertices,n); 578 } 579 if (!pcbddc->symmetric_primal) { 580 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 581 unsymmetric_check = PETSC_TRUE; 582 } 583 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 584 585 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 586 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 587 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 590 if (isLU || isILU || isCHOL) { 591 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 592 } else if (sub_schurs->reuse_mumps) { 593 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 594 MatFactorType type; 595 596 F = reuse_mumps->F; 597 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 598 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 599 } else { 600 F = NULL; 601 } 602 603 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 604 if (n_constraints) { 605 Mat M1,M2,M3; 606 Mat auxmat; 607 IS is_aux; 608 PetscScalar *array,*array2; 609 610 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 611 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 612 613 /* Extract constraints on R nodes: C_{CR} */ 614 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 615 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 616 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 617 618 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 619 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 620 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 621 for (i=0;i<n_constraints;i++) { 622 const PetscScalar *row_cmat_values; 623 const PetscInt *row_cmat_indices; 624 PetscInt size_of_constraint,j; 625 626 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 627 for (j=0;j<size_of_constraint;j++) { 628 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 629 } 630 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 631 } 632 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 633 if (F) { 634 Mat B; 635 636 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 637 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 638 ierr = MatDestroy(&B);CHKERRQ(ierr); 639 } else { 640 PetscScalar *marr; 641 642 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 643 for (i=0;i<n_constraints;i++) { 644 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 645 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 646 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 647 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 649 } 650 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 651 } 652 if (!pcbddc->switch_static) { 653 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 654 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 655 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 656 for (i=0;i<n_constraints;i++) { 657 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 658 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 659 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 660 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 661 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 662 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 663 } 664 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 665 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 666 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 667 } else { 668 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 669 pcbddc->local_auxmat2 = local_auxmat2_R; 670 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 671 } 672 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 673 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 674 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 675 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 677 if (isCHOL) { 678 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 679 } else { 680 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 681 } 682 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 683 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 684 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 685 ierr = MatDestroy(&M2);CHKERRQ(ierr); 686 ierr = MatDestroy(&M3);CHKERRQ(ierr); 687 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 688 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 689 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 690 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 691 ierr = MatDestroy(&M1);CHKERRQ(ierr); 692 } 693 /* Get submatrices from subdomain matrix */ 694 if (n_vertices) { 695 IS is_aux; 696 697 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 698 IS tis; 699 700 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 701 ierr = ISSort(tis);CHKERRQ(ierr); 702 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 703 ierr = ISDestroy(&tis);CHKERRQ(ierr); 704 } else { 705 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 706 } 707 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 708 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 709 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 710 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 711 } 712 713 /* Matrix of coarse basis functions (local) */ 714 if (pcbddc->coarse_phi_B) { 715 PetscInt on_B,on_primal,on_D=n_D; 716 if (pcbddc->coarse_phi_D) { 717 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 718 } 719 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 720 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 721 PetscScalar *marray; 722 723 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 724 ierr = PetscFree(marray);CHKERRQ(ierr); 725 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 726 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 727 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 728 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 729 } 730 } 731 732 if (!pcbddc->coarse_phi_B) { 733 PetscScalar *marray; 734 735 n = n_B*pcbddc->local_primal_size; 736 if (pcbddc->switch_static || pcbddc->dbg_flag) { 737 n += n_D*pcbddc->local_primal_size; 738 } 739 if (!pcbddc->symmetric_primal) { 740 n *= 2; 741 } 742 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 743 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 744 n = n_B*pcbddc->local_primal_size; 745 if (pcbddc->switch_static || pcbddc->dbg_flag) { 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 747 n += n_D*pcbddc->local_primal_size; 748 } 749 if (!pcbddc->symmetric_primal) { 750 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 751 if (pcbddc->switch_static || pcbddc->dbg_flag) { 752 n = n_B*pcbddc->local_primal_size; 753 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 754 } 755 } else { 756 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 757 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 758 if (pcbddc->switch_static || pcbddc->dbg_flag) { 759 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 760 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 761 } 762 } 763 } 764 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 765 /* vertices */ 766 if (n_vertices) { 767 768 ierr = MatConvert(A_VV,impMatType,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 769 770 if (n_R) { 771 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 772 PetscBLASInt B_N,B_one = 1; 773 PetscScalar *x,*y; 774 PetscBool isseqaij; 775 776 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 777 ierr = MatConvert(A_RV,impMatType,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 778 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 779 if (F) { /* TODO could be optimized for symmetric problems */ 780 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 781 } else { 782 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 783 for (i=0;i<n_vertices;i++) { 784 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 785 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 786 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 787 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 788 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 789 } 790 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 791 } 792 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 793 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 794 if (n_constraints) { 795 Mat B; 796 797 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 798 for (i=0;i<n_vertices;i++) { 799 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 800 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 801 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 802 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 803 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 804 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 805 } 806 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 807 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 808 ierr = MatDestroy(&B);CHKERRQ(ierr); 809 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 810 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 811 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 812 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 813 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 814 ierr = MatDestroy(&B);CHKERRQ(ierr); 815 } 816 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 817 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 818 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 819 } 820 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 821 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 822 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 823 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 824 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 825 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 826 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 827 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 828 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 829 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 830 } else { 831 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 832 } 833 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 834 /* coarse basis functions */ 835 for (i=0;i<n_vertices;i++) { 836 PetscScalar *y; 837 838 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 839 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 840 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 841 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 y[n_B*i+idx_V_B[i]] = 1.0; 844 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 845 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 846 847 if (pcbddc->switch_static || pcbddc->dbg_flag) { 848 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 849 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 850 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 851 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 853 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 854 } 855 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 856 } 857 /* if n_R == 0 the object is not destroyed */ 858 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 859 } 860 861 if (n_constraints) { 862 Mat B; 863 864 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 865 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 866 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 867 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 868 if (n_vertices) { 869 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 870 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 871 } else { 872 Mat S_VCt; 873 874 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 875 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 876 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 877 } 878 } 879 ierr = MatDestroy(&B);CHKERRQ(ierr); 880 /* coarse basis functions */ 881 for (i=0;i<n_constraints;i++) { 882 PetscScalar *y; 883 884 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 885 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 886 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 887 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 888 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 890 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 891 if (pcbddc->switch_static || pcbddc->dbg_flag) { 892 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 893 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 894 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 897 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 898 } 899 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 900 } 901 } 902 if (n_constraints) { 903 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 904 } 905 906 /* compute other basis functions for non-symmetric problems */ 907 if (!pcbddc->symmetric_primal) { 908 909 if (n_constraints) { 910 Mat S_CCT,B_C; 911 912 /* this is a lazy thing */ 913 ierr = MatConvert(C_CR,impMatType,MAT_INPLACE_MATRIX,&C_CR);CHKERRQ(ierr); 914 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 915 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 916 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 917 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 918 if (n_vertices) { 919 Mat B_V,S_VCT; 920 921 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 922 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 923 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 924 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 925 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 926 } 927 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 928 } else { /* if there are no constraints, reset work */ 929 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 930 } 931 if (n_vertices && n_R) { 932 Mat A_VRT; 933 PetscScalar *marray; 934 PetscBLASInt B_N,B_one = 1; 935 936 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 937 ierr = MatConvert(A_VRT,impMatType,MAT_INPLACE_MATRIX,&A_VRT);CHKERRQ(ierr); 938 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 939 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 940 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 941 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 942 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 943 } 944 945 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 946 for (i=0;i<pcbddc->local_primal_size;i++) { 947 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 948 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 949 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 950 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 951 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 952 } 953 } else { 954 for (i=0;i<pcbddc->local_primal_size;i++) { 955 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 956 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 957 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 958 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 959 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 960 } 961 } 962 /* coarse basis functions */ 963 for (i=0;i<pcbddc->local_primal_size;i++) { 964 PetscScalar *y; 965 966 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 967 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 968 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 969 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 970 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 971 if (i<n_vertices) { 972 y[n_B*i+idx_V_B[i]] = 1.0; 973 } 974 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 975 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 976 977 if (pcbddc->switch_static || pcbddc->dbg_flag) { 978 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 979 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 980 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 982 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 983 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 984 } 985 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 986 } 987 } 988 /* free memory */ 989 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 990 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 991 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 992 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 993 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 994 ierr = PetscFree(work);CHKERRQ(ierr); 995 if (n_vertices) { 996 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 997 } 998 if (n_constraints) { 999 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1000 } 1001 /* Checking coarse_sub_mat and coarse basis functios */ 1002 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1003 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1004 if (pcbddc->dbg_flag) { 1005 Mat coarse_sub_mat; 1006 Mat AUXMAT,TM1,TM2,TM3,TM4; 1007 Mat coarse_phi_D,coarse_phi_B; 1008 Mat coarse_psi_D,coarse_psi_B; 1009 Mat A_II,A_BB,A_IB,A_BI; 1010 Mat C_B,CPHI; 1011 IS is_dummy; 1012 Vec mones; 1013 MatType checkmattype=MATSEQAIJ; 1014 PetscReal real_value; 1015 1016 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1017 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1018 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1019 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1020 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1021 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1022 if (unsymmetric_check) { 1023 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1024 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1025 } 1026 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1027 1028 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1029 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1030 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1031 if (unsymmetric_check) { 1032 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1033 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1034 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1035 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1036 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1037 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1038 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1039 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1040 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1041 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1042 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1043 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1044 } else { 1045 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1046 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1047 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1048 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1049 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1050 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1051 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1052 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1053 } 1054 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1055 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1056 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1057 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 1058 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1059 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1060 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1061 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1062 1063 /* check constraints */ 1064 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1065 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1066 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1067 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1068 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1069 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1070 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1071 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1072 if (unsymmetric_check) { 1073 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1074 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1075 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1076 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1077 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1078 } 1079 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1080 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1081 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1082 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1083 1084 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1085 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1086 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1087 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1088 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1089 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1090 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1091 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1092 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1093 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1094 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1095 if (unsymmetric_check) { 1096 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1097 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1098 } 1099 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1100 } 1101 /* get back data */ 1102 *coarse_submat_vals_n = coarse_submat_vals; 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1108 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1109 { 1110 Mat *work_mat; 1111 IS isrow_s,iscol_s; 1112 PetscBool rsorted,csorted; 1113 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1114 PetscErrorCode ierr; 1115 1116 PetscFunctionBegin; 1117 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1118 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1119 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1120 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1121 1122 if (!rsorted) { 1123 const PetscInt *idxs; 1124 PetscInt *idxs_sorted,i; 1125 1126 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1127 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1128 for (i=0;i<rsize;i++) { 1129 idxs_perm_r[i] = i; 1130 } 1131 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1132 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1133 for (i=0;i<rsize;i++) { 1134 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1135 } 1136 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1137 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1138 } else { 1139 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1140 isrow_s = isrow; 1141 } 1142 1143 if (!csorted) { 1144 if (isrow == iscol) { 1145 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1146 iscol_s = isrow_s; 1147 } else { 1148 const PetscInt *idxs; 1149 PetscInt *idxs_sorted,i; 1150 1151 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1152 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1153 for (i=0;i<csize;i++) { 1154 idxs_perm_c[i] = i; 1155 } 1156 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1157 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1158 for (i=0;i<csize;i++) { 1159 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1160 } 1161 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1162 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1163 } 1164 } else { 1165 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1166 iscol_s = iscol; 1167 } 1168 1169 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1170 1171 if (!rsorted || !csorted) { 1172 Mat new_mat; 1173 IS is_perm_r,is_perm_c; 1174 1175 if (!rsorted) { 1176 PetscInt *idxs_r,i; 1177 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1178 for (i=0;i<rsize;i++) { 1179 idxs_r[idxs_perm_r[i]] = i; 1180 } 1181 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1182 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1183 } else { 1184 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1185 } 1186 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1187 1188 if (!csorted) { 1189 if (isrow_s == iscol_s) { 1190 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1191 is_perm_c = is_perm_r; 1192 } else { 1193 PetscInt *idxs_c,i; 1194 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1195 for (i=0;i<csize;i++) { 1196 idxs_c[idxs_perm_c[i]] = i; 1197 } 1198 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1199 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1200 } 1201 } else { 1202 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1203 } 1204 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1205 1206 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1207 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1208 work_mat[0] = new_mat; 1209 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1210 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1211 } 1212 1213 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1214 *B = work_mat[0]; 1215 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1216 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1217 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1223 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1224 { 1225 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1226 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1227 Mat new_mat; 1228 IS is_local,is_global; 1229 PetscInt local_size; 1230 PetscBool isseqaij; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1235 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1236 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1237 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1238 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1239 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1240 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1241 1242 /* check */ 1243 if (pcbddc->dbg_flag) { 1244 Vec x,x_change; 1245 PetscReal error; 1246 1247 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1248 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1249 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1250 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1253 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1254 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1255 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1256 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1257 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1258 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1259 ierr = VecDestroy(&x);CHKERRQ(ierr); 1260 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1261 } 1262 1263 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1264 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1265 if (isseqaij) { 1266 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1267 } else { 1268 Mat work_mat; 1269 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1270 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1271 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1272 } 1273 if (matis->A->symmetric_set) { 1274 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1275 #if !defined(PETSC_USE_COMPLEX) 1276 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1277 #endif 1278 } 1279 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1285 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1286 { 1287 PC_IS* pcis = (PC_IS*)(pc->data); 1288 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1289 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1290 PetscInt *idx_R_local=NULL; 1291 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1292 PetscInt vbs,bs; 1293 PetscBT bitmask=NULL; 1294 PetscErrorCode ierr; 1295 1296 PetscFunctionBegin; 1297 /* 1298 No need to setup local scatters if 1299 - primal space is unchanged 1300 AND 1301 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1302 AND 1303 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1304 */ 1305 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1306 PetscFunctionReturn(0); 1307 } 1308 /* destroy old objects */ 1309 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1310 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1311 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1312 /* Set Non-overlapping dimensions */ 1313 n_B = pcis->n_B; 1314 n_D = pcis->n - n_B; 1315 n_vertices = pcbddc->n_vertices; 1316 1317 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1318 1319 /* create auxiliary bitmask and allocate workspace */ 1320 if (!sub_schurs->reuse_mumps) { 1321 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1322 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1323 for (i=0;i<n_vertices;i++) { 1324 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1325 } 1326 1327 for (i=0, n_R=0; i<pcis->n; i++) { 1328 if (!PetscBTLookup(bitmask,i)) { 1329 idx_R_local[n_R++] = i; 1330 } 1331 } 1332 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1333 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1334 1335 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1336 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1337 } 1338 1339 /* Block code */ 1340 vbs = 1; 1341 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1342 if (bs>1 && !(n_vertices%bs)) { 1343 PetscBool is_blocked = PETSC_TRUE; 1344 PetscInt *vary; 1345 if (!sub_schurs->reuse_mumps) { 1346 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1347 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1348 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1349 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1350 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1351 for (i=0; i<pcis->n/bs; i++) { 1352 if (vary[i]!=0 && vary[i]!=bs) { 1353 is_blocked = PETSC_FALSE; 1354 break; 1355 } 1356 } 1357 ierr = PetscFree(vary);CHKERRQ(ierr); 1358 } else { 1359 /* Verify directly the R set */ 1360 for (i=0; i<n_R/bs; i++) { 1361 PetscInt j,node=idx_R_local[bs*i]; 1362 for (j=1; j<bs; j++) { 1363 if (node != idx_R_local[bs*i+j]-j) { 1364 is_blocked = PETSC_FALSE; 1365 break; 1366 } 1367 } 1368 } 1369 } 1370 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1371 vbs = bs; 1372 for (i=0;i<n_R/vbs;i++) { 1373 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1374 } 1375 } 1376 } 1377 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1378 if (sub_schurs->reuse_mumps) { 1379 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1380 1381 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1382 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1383 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1384 reuse_mumps->is_R = pcbddc->is_R_local; 1385 } else { 1386 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1387 } 1388 1389 /* print some info if requested */ 1390 if (pcbddc->dbg_flag) { 1391 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1392 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1393 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1394 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1395 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1396 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1397 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1398 } 1399 1400 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1401 if (!sub_schurs->reuse_mumps) { 1402 IS is_aux1,is_aux2; 1403 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1404 1405 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1406 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1407 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1408 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1409 for (i=0; i<n_D; i++) { 1410 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1411 } 1412 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1413 for (i=0, j=0; i<n_R; i++) { 1414 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1415 aux_array1[j++] = i; 1416 } 1417 } 1418 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1419 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1420 for (i=0, j=0; i<n_B; i++) { 1421 if (!PetscBTLookup(bitmask,is_indices[i])) { 1422 aux_array2[j++] = i; 1423 } 1424 } 1425 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1426 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1427 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1428 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1429 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1430 1431 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1432 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1433 for (i=0, j=0; i<n_R; i++) { 1434 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1435 aux_array1[j++] = i; 1436 } 1437 } 1438 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1439 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1440 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1441 } 1442 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1443 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1444 } else { 1445 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1446 IS tis; 1447 PetscInt schur_size; 1448 1449 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1450 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1451 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1452 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1453 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1454 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1455 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1456 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1457 } 1458 } 1459 PetscFunctionReturn(0); 1460 } 1461 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1465 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1466 { 1467 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1468 PC_IS *pcis = (PC_IS*)pc->data; 1469 PC pc_temp; 1470 Mat A_RR; 1471 MatReuse reuse; 1472 PetscScalar m_one = -1.0; 1473 PetscReal value; 1474 PetscInt n_D,n_R; 1475 PetscBool use_exact,use_exact_reduced,issbaij; 1476 PetscErrorCode ierr; 1477 /* prefixes stuff */ 1478 char dir_prefix[256],neu_prefix[256],str_level[16]; 1479 size_t len; 1480 1481 PetscFunctionBegin; 1482 1483 /* compute prefixes */ 1484 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1485 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1486 if (!pcbddc->current_level) { 1487 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1488 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1489 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1490 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1491 } else { 1492 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1493 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1494 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1495 len -= 15; /* remove "pc_bddc_coarse_" */ 1496 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1497 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1498 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1499 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1500 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1501 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1502 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1503 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1504 } 1505 1506 /* DIRICHLET PROBLEM */ 1507 if (dirichlet) { 1508 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1509 if (pcbddc->local_mat->symmetric_set) { 1510 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1511 } 1512 /* Matrix for Dirichlet problem is pcis->A_II */ 1513 n_D = pcis->n - pcis->n_B; 1514 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1515 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1516 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1517 /* default */ 1518 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1519 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1521 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1522 if (issbaij) { 1523 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1524 } else { 1525 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1526 } 1527 /* Allow user's customization */ 1528 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1529 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1530 } 1531 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1532 if (sub_schurs->reuse_mumps) { 1533 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1534 1535 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1536 } 1537 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1538 if (!n_D) { 1539 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1540 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1541 } 1542 /* Set Up KSP for Dirichlet problem of BDDC */ 1543 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1544 /* set ksp_D into pcis data */ 1545 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1546 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1547 pcis->ksp_D = pcbddc->ksp_D; 1548 } 1549 1550 /* NEUMANN PROBLEM */ 1551 A_RR = 0; 1552 if (neumann) { 1553 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1554 PetscInt ibs,mbs; 1555 PetscBool issbaij; 1556 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1557 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1558 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1559 if (pcbddc->ksp_R) { /* already created ksp */ 1560 PetscInt nn_R; 1561 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1562 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1563 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1564 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1565 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1566 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1567 reuse = MAT_INITIAL_MATRIX; 1568 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1569 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1570 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1571 reuse = MAT_INITIAL_MATRIX; 1572 } else { /* safe to reuse the matrix */ 1573 reuse = MAT_REUSE_MATRIX; 1574 } 1575 } 1576 /* last check */ 1577 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1578 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1579 reuse = MAT_INITIAL_MATRIX; 1580 } 1581 } else { /* first time, so we need to create the matrix */ 1582 reuse = MAT_INITIAL_MATRIX; 1583 } 1584 /* extract A_RR */ 1585 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1586 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1587 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1588 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1589 if (matis->A == pcbddc->local_mat) { 1590 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1591 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1592 } else { 1593 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1594 } 1595 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1596 if (matis->A == pcbddc->local_mat) { 1597 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1598 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1599 } else { 1600 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1601 } 1602 } 1603 if (!sub_schurs->reuse_mumps) { 1604 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1605 if (pcbddc->local_mat->symmetric_set) { 1606 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1607 } 1608 } else { 1609 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1610 1611 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1612 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1613 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1614 } 1615 if (!pcbddc->ksp_R) { /* create object if not present */ 1616 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1617 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1618 /* default */ 1619 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1620 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1621 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1622 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1623 if (issbaij) { 1624 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1625 } else { 1626 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1627 } 1628 /* Allow user's customization */ 1629 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1630 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1631 } 1632 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1633 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1634 if (!n_R) { 1635 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1636 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1637 } 1638 /* Reuse MUMPS solver if it is present */ 1639 if (sub_schurs->reuse_mumps) { 1640 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1641 1642 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1643 } 1644 /* Set Up KSP for Neumann problem of BDDC */ 1645 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1646 } 1647 /* free Neumann problem's matrix */ 1648 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1649 1650 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1651 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1652 if (pcbddc->dbg_flag) { 1653 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1654 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1655 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1656 } 1657 if (dirichlet) { /* Dirichlet */ 1658 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1659 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1660 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1661 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1662 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1663 /* need to be adapted? */ 1664 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1665 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1666 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1667 /* print info */ 1668 if (pcbddc->dbg_flag) { 1669 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1670 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1671 } 1672 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1673 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1674 } 1675 } 1676 if (neumann) { /* Neumann */ 1677 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1678 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1679 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1680 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1681 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1682 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1683 /* need to be adapted? */ 1684 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1685 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1686 /* print info */ 1687 if (pcbddc->dbg_flag) { 1688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1689 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1690 } 1691 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1692 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1693 } 1694 } 1695 } 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1701 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1702 { 1703 PetscErrorCode ierr; 1704 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1705 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1706 1707 PetscFunctionBegin; 1708 if (!sub_schurs->reuse_mumps) { 1709 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1710 } 1711 if (!pcbddc->switch_static) { 1712 if (applytranspose && pcbddc->local_auxmat1) { 1713 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1714 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1715 } 1716 if (!sub_schurs->reuse_mumps) { 1717 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1718 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1719 } else { 1720 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1721 1722 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1723 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 } 1725 } else { 1726 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1727 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1728 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1729 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1730 if (applytranspose && pcbddc->local_auxmat1) { 1731 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1732 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1733 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1734 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1735 } 1736 } 1737 if (!sub_schurs->reuse_mumps) { 1738 if (applytranspose) { 1739 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1740 } else { 1741 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1742 } 1743 } else { 1744 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1745 1746 if (applytranspose) { 1747 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1748 } else { 1749 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1750 } 1751 } 1752 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1753 if (!pcbddc->switch_static) { 1754 if (!sub_schurs->reuse_mumps) { 1755 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1756 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1757 } else { 1758 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1759 1760 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1761 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1762 } 1763 if (!applytranspose && pcbddc->local_auxmat1) { 1764 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1765 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1766 } 1767 } else { 1768 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1769 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1770 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1771 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1772 if (!applytranspose && pcbddc->local_auxmat1) { 1773 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1774 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1775 } 1776 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1778 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 } 1781 PetscFunctionReturn(0); 1782 } 1783 1784 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1787 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1788 { 1789 PetscErrorCode ierr; 1790 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1791 PC_IS* pcis = (PC_IS*) (pc->data); 1792 const PetscScalar zero = 0.0; 1793 1794 PetscFunctionBegin; 1795 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1796 if (applytranspose) { 1797 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1798 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1799 } else { 1800 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1801 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1802 } 1803 /* start communications from local primal nodes to rhs of coarse solver */ 1804 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1805 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1806 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1807 1808 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1809 /* TODO remove null space when doing multilevel */ 1810 if (pcbddc->coarse_ksp) { 1811 Vec rhs,sol; 1812 1813 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1814 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1815 if (applytranspose) { 1816 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1817 } else { 1818 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1819 } 1820 } 1821 1822 /* Local solution on R nodes */ 1823 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1824 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1825 } 1826 1827 /* communications from coarse sol to local primal nodes */ 1828 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1829 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1830 1831 /* Sum contributions from two levels */ 1832 if (applytranspose) { 1833 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1834 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1835 } else { 1836 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1837 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1838 } 1839 PetscFunctionReturn(0); 1840 } 1841 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1844 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1845 { 1846 PetscErrorCode ierr; 1847 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1848 PetscScalar *array; 1849 Vec from,to; 1850 1851 PetscFunctionBegin; 1852 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1853 from = pcbddc->coarse_vec; 1854 to = pcbddc->vec1_P; 1855 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1856 Vec tvec; 1857 1858 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1859 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1860 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1861 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1862 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1863 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1864 } 1865 } else { /* from local to global -> put data in coarse right hand side */ 1866 from = pcbddc->vec1_P; 1867 to = pcbddc->coarse_vec; 1868 } 1869 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1875 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1876 { 1877 PetscErrorCode ierr; 1878 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1879 PetscScalar *array; 1880 Vec from,to; 1881 1882 PetscFunctionBegin; 1883 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1884 from = pcbddc->coarse_vec; 1885 to = pcbddc->vec1_P; 1886 } else { /* from local to global -> put data in coarse right hand side */ 1887 from = pcbddc->vec1_P; 1888 to = pcbddc->coarse_vec; 1889 } 1890 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1891 if (smode == SCATTER_FORWARD) { 1892 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1893 Vec tvec; 1894 1895 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1896 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1897 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1898 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1899 } 1900 } else { 1901 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1902 ierr = VecResetArray(from);CHKERRQ(ierr); 1903 } 1904 } 1905 PetscFunctionReturn(0); 1906 } 1907 1908 /* uncomment for testing purposes */ 1909 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1912 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1913 { 1914 PetscErrorCode ierr; 1915 PC_IS* pcis = (PC_IS*)(pc->data); 1916 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1917 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1918 /* one and zero */ 1919 PetscScalar one=1.0,zero=0.0; 1920 /* space to store constraints and their local indices */ 1921 PetscScalar *constraints_data; 1922 PetscInt *constraints_idxs,*constraints_idxs_B; 1923 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1924 PetscInt *constraints_n; 1925 /* iterators */ 1926 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1927 /* BLAS integers */ 1928 PetscBLASInt lwork,lierr; 1929 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1930 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1931 /* reuse */ 1932 PetscInt olocal_primal_size,olocal_primal_size_cc; 1933 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1934 /* change of basis */ 1935 PetscBool qr_needed; 1936 PetscBT change_basis,qr_needed_idx; 1937 /* auxiliary stuff */ 1938 PetscInt *nnz,*is_indices; 1939 PetscInt ncc; 1940 /* some quantities */ 1941 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1942 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1943 1944 PetscFunctionBegin; 1945 /* Destroy Mat objects computed previously */ 1946 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1947 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1948 /* save info on constraints from previous setup (if any) */ 1949 olocal_primal_size = pcbddc->local_primal_size; 1950 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1951 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1952 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1953 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1954 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1955 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1956 1957 /* print some info */ 1958 if (pcbddc->dbg_flag) { 1959 IS vertices; 1960 PetscInt nv,nedges,nfaces; 1961 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1962 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1963 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1964 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1965 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1966 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1967 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1968 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1969 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1970 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1971 } 1972 1973 if (!pcbddc->adaptive_selection) { 1974 IS ISForVertices,*ISForFaces,*ISForEdges; 1975 MatNullSpace nearnullsp; 1976 const Vec *nearnullvecs; 1977 Vec *localnearnullsp; 1978 PetscScalar *array; 1979 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1980 PetscBool nnsp_has_cnst; 1981 /* LAPACK working arrays for SVD or POD */ 1982 PetscBool skip_lapack,boolforchange; 1983 PetscScalar *work; 1984 PetscReal *singular_vals; 1985 #if defined(PETSC_USE_COMPLEX) 1986 PetscReal *rwork; 1987 #endif 1988 #if defined(PETSC_MISSING_LAPACK_GESVD) 1989 PetscScalar *temp_basis,*correlation_mat; 1990 #else 1991 PetscBLASInt dummy_int=1; 1992 PetscScalar dummy_scalar=1.; 1993 #endif 1994 1995 /* Get index sets for faces, edges and vertices from graph */ 1996 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1997 /* free unneeded index sets */ 1998 if (!pcbddc->use_vertices) { 1999 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2000 } 2001 if (!pcbddc->use_edges) { 2002 for (i=0;i<n_ISForEdges;i++) { 2003 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2004 } 2005 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2006 n_ISForEdges = 0; 2007 } 2008 if (!pcbddc->use_faces) { 2009 for (i=0;i<n_ISForFaces;i++) { 2010 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2011 } 2012 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2013 n_ISForFaces = 0; 2014 } 2015 2016 #if defined(PETSC_USE_DEBUG) 2017 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2018 Also use_change_of_basis should be consistent among processors */ 2019 if (pcbddc->NullSpace) { 2020 PetscBool tbool[2],gbool[2]; 2021 2022 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2023 pcbddc->use_change_of_basis = PETSC_TRUE; 2024 if (!ISForEdges) { 2025 pcbddc->use_change_on_faces = PETSC_TRUE; 2026 } 2027 } 2028 tbool[0] = pcbddc->use_change_of_basis; 2029 tbool[1] = pcbddc->use_change_on_faces; 2030 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2031 pcbddc->use_change_of_basis = gbool[0]; 2032 pcbddc->use_change_on_faces = gbool[1]; 2033 } 2034 #endif 2035 2036 /* check if near null space is attached to global mat */ 2037 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2038 if (nearnullsp) { 2039 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2040 /* remove any stored info */ 2041 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2042 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2043 /* store information for BDDC solver reuse */ 2044 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2045 pcbddc->onearnullspace = nearnullsp; 2046 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2047 for (i=0;i<nnsp_size;i++) { 2048 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2049 } 2050 } else { /* if near null space is not provided BDDC uses constants by default */ 2051 nnsp_size = 0; 2052 nnsp_has_cnst = PETSC_TRUE; 2053 } 2054 /* get max number of constraints on a single cc */ 2055 max_constraints = nnsp_size; 2056 if (nnsp_has_cnst) max_constraints++; 2057 2058 /* 2059 Evaluate maximum storage size needed by the procedure 2060 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2061 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2062 There can be multiple constraints per connected component 2063 */ 2064 n_vertices = 0; 2065 if (ISForVertices) { 2066 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2067 } 2068 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2069 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2070 2071 total_counts = n_ISForFaces+n_ISForEdges; 2072 total_counts *= max_constraints; 2073 total_counts += n_vertices; 2074 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2075 2076 total_counts = 0; 2077 max_size_of_constraint = 0; 2078 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2079 IS used_is; 2080 if (i<n_ISForEdges) { 2081 used_is = ISForEdges[i]; 2082 } else { 2083 used_is = ISForFaces[i-n_ISForEdges]; 2084 } 2085 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2086 total_counts += j; 2087 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2088 } 2089 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2090 2091 /* get local part of global near null space vectors */ 2092 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2093 for (k=0;k<nnsp_size;k++) { 2094 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2095 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2096 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2097 } 2098 2099 /* whether or not to skip lapack calls */ 2100 skip_lapack = PETSC_TRUE; 2101 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2102 2103 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2104 if (!skip_lapack) { 2105 PetscScalar temp_work; 2106 2107 #if defined(PETSC_MISSING_LAPACK_GESVD) 2108 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2109 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2110 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2111 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2112 #if defined(PETSC_USE_COMPLEX) 2113 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2114 #endif 2115 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2116 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2117 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2118 lwork = -1; 2119 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2120 #if !defined(PETSC_USE_COMPLEX) 2121 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2122 #else 2123 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2124 #endif 2125 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2126 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2127 #else /* on missing GESVD */ 2128 /* SVD */ 2129 PetscInt max_n,min_n; 2130 max_n = max_size_of_constraint; 2131 min_n = max_constraints; 2132 if (max_size_of_constraint < max_constraints) { 2133 min_n = max_size_of_constraint; 2134 max_n = max_constraints; 2135 } 2136 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2137 #if defined(PETSC_USE_COMPLEX) 2138 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2139 #endif 2140 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2141 lwork = -1; 2142 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2143 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2144 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2145 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2146 #if !defined(PETSC_USE_COMPLEX) 2147 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2148 #else 2149 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2150 #endif 2151 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2152 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2153 #endif /* on missing GESVD */ 2154 /* Allocate optimal workspace */ 2155 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2156 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2157 } 2158 /* Now we can loop on constraining sets */ 2159 total_counts = 0; 2160 constraints_idxs_ptr[0] = 0; 2161 constraints_data_ptr[0] = 0; 2162 /* vertices */ 2163 if (n_vertices) { 2164 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2165 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2166 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2167 for (i=0;i<n_vertices;i++) { 2168 constraints_n[total_counts] = 1; 2169 constraints_data[total_counts] = 1.0; 2170 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2171 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2172 total_counts++; 2173 } 2174 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2175 PetscBool used_vertex; 2176 for (i=0;i<n_vertices;i++) { 2177 used_vertex = PETSC_FALSE; 2178 k = 0; 2179 while (!used_vertex && k<nnsp_size) { 2180 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2181 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2182 constraints_n[total_counts] = 1; 2183 constraints_idxs[total_counts] = is_indices[i]; 2184 constraints_data[total_counts] = 1.0; 2185 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2186 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2187 total_counts++; 2188 used_vertex = PETSC_TRUE; 2189 } 2190 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2191 k++; 2192 } 2193 } 2194 } 2195 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2196 n_vertices = total_counts; 2197 } 2198 2199 /* edges and faces */ 2200 total_counts_cc = total_counts; 2201 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2202 IS used_is; 2203 PetscBool idxs_copied = PETSC_FALSE; 2204 2205 if (ncc<n_ISForEdges) { 2206 used_is = ISForEdges[ncc]; 2207 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2208 } else { 2209 used_is = ISForFaces[ncc-n_ISForEdges]; 2210 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2211 } 2212 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2213 2214 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2215 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2216 /* change of basis should not be performed on local periodic nodes */ 2217 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2218 if (nnsp_has_cnst) { 2219 PetscScalar quad_value; 2220 2221 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2222 idxs_copied = PETSC_TRUE; 2223 2224 if (!pcbddc->use_nnsp_true) { 2225 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2226 } else { 2227 quad_value = 1.0; 2228 } 2229 for (j=0;j<size_of_constraint;j++) { 2230 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2231 } 2232 temp_constraints++; 2233 total_counts++; 2234 } 2235 for (k=0;k<nnsp_size;k++) { 2236 PetscReal real_value; 2237 PetscScalar *ptr_to_data; 2238 2239 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2240 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2241 for (j=0;j<size_of_constraint;j++) { 2242 ptr_to_data[j] = array[is_indices[j]]; 2243 } 2244 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2245 /* check if array is null on the connected component */ 2246 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2247 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2248 if (real_value > 0.0) { /* keep indices and values */ 2249 temp_constraints++; 2250 total_counts++; 2251 if (!idxs_copied) { 2252 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2253 idxs_copied = PETSC_TRUE; 2254 } 2255 } 2256 } 2257 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2258 valid_constraints = temp_constraints; 2259 if (!pcbddc->use_nnsp_true && temp_constraints) { 2260 if (temp_constraints == 1) { /* just normalize the constraint */ 2261 PetscScalar norm,*ptr_to_data; 2262 2263 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2264 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2265 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2266 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2267 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2268 } else { /* perform SVD */ 2269 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2270 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2271 2272 #if defined(PETSC_MISSING_LAPACK_GESVD) 2273 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2274 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2275 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2276 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2277 from that computed using LAPACKgesvd 2278 -> This is due to a different computation of eigenvectors in LAPACKheev 2279 -> The quality of the POD-computed basis will be the same */ 2280 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2281 /* Store upper triangular part of correlation matrix */ 2282 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2283 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2284 for (j=0;j<temp_constraints;j++) { 2285 for (k=0;k<j+1;k++) { 2286 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2287 } 2288 } 2289 /* compute eigenvalues and eigenvectors of correlation matrix */ 2290 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2291 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2292 #if !defined(PETSC_USE_COMPLEX) 2293 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2294 #else 2295 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2296 #endif 2297 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2298 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2299 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2300 j = 0; 2301 while (j < temp_constraints && singular_vals[j] < tol) j++; 2302 total_counts = total_counts-j; 2303 valid_constraints = temp_constraints-j; 2304 /* scale and copy POD basis into used quadrature memory */ 2305 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2306 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2307 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2308 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2309 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2310 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2311 if (j<temp_constraints) { 2312 PetscInt ii; 2313 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2314 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2315 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2316 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2317 for (k=0;k<temp_constraints-j;k++) { 2318 for (ii=0;ii<size_of_constraint;ii++) { 2319 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2320 } 2321 } 2322 } 2323 #else /* on missing GESVD */ 2324 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2325 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2326 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2327 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2328 #if !defined(PETSC_USE_COMPLEX) 2329 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2330 #else 2331 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2332 #endif 2333 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2334 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2335 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2336 k = temp_constraints; 2337 if (k > size_of_constraint) k = size_of_constraint; 2338 j = 0; 2339 while (j < k && singular_vals[k-j-1] < tol) j++; 2340 valid_constraints = k-j; 2341 total_counts = total_counts-temp_constraints+valid_constraints; 2342 #endif /* on missing GESVD */ 2343 } 2344 } 2345 /* update pointers information */ 2346 if (valid_constraints) { 2347 constraints_n[total_counts_cc] = valid_constraints; 2348 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2349 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2350 /* set change_of_basis flag */ 2351 if (boolforchange) { 2352 PetscBTSet(change_basis,total_counts_cc); 2353 } 2354 total_counts_cc++; 2355 } 2356 } 2357 /* free workspace */ 2358 if (!skip_lapack) { 2359 ierr = PetscFree(work);CHKERRQ(ierr); 2360 #if defined(PETSC_USE_COMPLEX) 2361 ierr = PetscFree(rwork);CHKERRQ(ierr); 2362 #endif 2363 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2364 #if defined(PETSC_MISSING_LAPACK_GESVD) 2365 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2366 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2367 #endif 2368 } 2369 for (k=0;k<nnsp_size;k++) { 2370 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2371 } 2372 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2373 /* free index sets of faces, edges and vertices */ 2374 for (i=0;i<n_ISForFaces;i++) { 2375 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2376 } 2377 if (n_ISForFaces) { 2378 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2379 } 2380 for (i=0;i<n_ISForEdges;i++) { 2381 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2382 } 2383 if (n_ISForEdges) { 2384 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2385 } 2386 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2387 } else { 2388 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2389 2390 total_counts = 0; 2391 n_vertices = 0; 2392 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2393 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2394 } 2395 max_constraints = 0; 2396 total_counts_cc = 0; 2397 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2398 total_counts += pcbddc->adaptive_constraints_n[i]; 2399 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2400 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2401 } 2402 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2403 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2404 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2405 constraints_data = pcbddc->adaptive_constraints_data; 2406 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2407 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2408 total_counts_cc = 0; 2409 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2410 if (pcbddc->adaptive_constraints_n[i]) { 2411 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2412 } 2413 } 2414 #if 0 2415 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2416 for (i=0;i<total_counts_cc;i++) { 2417 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2418 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2419 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2420 printf(" %d",constraints_idxs[j]); 2421 } 2422 printf("\n"); 2423 printf("number of cc: %d\n",constraints_n[i]); 2424 } 2425 for (i=0;i<n_vertices;i++) { 2426 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2427 } 2428 for (i=0;i<sub_schurs->n_subs;i++) { 2429 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2430 } 2431 #endif 2432 2433 max_size_of_constraint = 0; 2434 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2435 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2436 /* Change of basis */ 2437 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2438 if (pcbddc->use_change_of_basis) { 2439 for (i=0;i<sub_schurs->n_subs;i++) { 2440 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2441 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2442 } 2443 } 2444 } 2445 } 2446 pcbddc->local_primal_size = total_counts; 2447 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2448 2449 /* map constraints_idxs in boundary numbering */ 2450 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2451 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 2452 2453 /* Create constraint matrix */ 2454 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2455 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2456 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2457 2458 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2459 /* determine if a QR strategy is needed for change of basis */ 2460 qr_needed = PETSC_FALSE; 2461 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2462 total_primal_vertices=0; 2463 pcbddc->local_primal_size_cc = 0; 2464 for (i=0;i<total_counts_cc;i++) { 2465 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2466 if (size_of_constraint == 1) { 2467 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2468 pcbddc->local_primal_size_cc += 1; 2469 } else if (PetscBTLookup(change_basis,i)) { 2470 for (k=0;k<constraints_n[i];k++) { 2471 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2472 } 2473 pcbddc->local_primal_size_cc += constraints_n[i]; 2474 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2475 PetscBTSet(qr_needed_idx,i); 2476 qr_needed = PETSC_TRUE; 2477 } 2478 } else { 2479 pcbddc->local_primal_size_cc += 1; 2480 } 2481 } 2482 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2483 pcbddc->n_vertices = total_primal_vertices; 2484 /* permute indices in order to have a sorted set of vertices */ 2485 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2486 2487 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2488 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2489 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2490 2491 /* nonzero structure of constraint matrix */ 2492 /* and get reference dof for local constraints */ 2493 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2494 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2495 2496 j = total_primal_vertices; 2497 total_counts = total_primal_vertices; 2498 cum = total_primal_vertices; 2499 for (i=n_vertices;i<total_counts_cc;i++) { 2500 if (!PetscBTLookup(change_basis,i)) { 2501 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2502 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2503 cum++; 2504 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2505 for (k=0;k<constraints_n[i];k++) { 2506 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2507 nnz[j+k] = size_of_constraint; 2508 } 2509 j += constraints_n[i]; 2510 } 2511 } 2512 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2513 ierr = PetscFree(nnz);CHKERRQ(ierr); 2514 2515 /* set values in constraint matrix */ 2516 for (i=0;i<total_primal_vertices;i++) { 2517 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2518 } 2519 total_counts = total_primal_vertices; 2520 for (i=n_vertices;i<total_counts_cc;i++) { 2521 if (!PetscBTLookup(change_basis,i)) { 2522 PetscInt *cols; 2523 2524 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2525 cols = constraints_idxs+constraints_idxs_ptr[i]; 2526 for (k=0;k<constraints_n[i];k++) { 2527 PetscInt row = total_counts+k; 2528 PetscScalar *vals; 2529 2530 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2531 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2532 } 2533 total_counts += constraints_n[i]; 2534 } 2535 } 2536 /* assembling */ 2537 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2538 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2539 2540 /* 2541 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2542 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2543 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 2544 */ 2545 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2546 if (pcbddc->use_change_of_basis) { 2547 /* dual and primal dofs on a single cc */ 2548 PetscInt dual_dofs,primal_dofs; 2549 /* working stuff for GEQRF */ 2550 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2551 PetscBLASInt lqr_work; 2552 /* working stuff for UNGQR */ 2553 PetscScalar *gqr_work,lgqr_work_t; 2554 PetscBLASInt lgqr_work; 2555 /* working stuff for TRTRS */ 2556 PetscScalar *trs_rhs; 2557 PetscBLASInt Blas_NRHS; 2558 /* pointers for values insertion into change of basis matrix */ 2559 PetscInt *start_rows,*start_cols; 2560 PetscScalar *start_vals; 2561 /* working stuff for values insertion */ 2562 PetscBT is_primal; 2563 PetscInt *aux_primal_numbering_B; 2564 /* matrix sizes */ 2565 PetscInt global_size,local_size; 2566 /* temporary change of basis */ 2567 Mat localChangeOfBasisMatrix; 2568 /* extra space for debugging */ 2569 PetscScalar *dbg_work; 2570 2571 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2572 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2573 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2574 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2575 /* nonzeros for local mat */ 2576 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2577 for (i=0;i<pcis->n;i++) nnz[i]=1; 2578 for (i=n_vertices;i<total_counts_cc;i++) { 2579 if (PetscBTLookup(change_basis,i)) { 2580 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2581 if (PetscBTLookup(qr_needed_idx,i)) { 2582 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2583 } else { 2584 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2585 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2586 } 2587 } 2588 } 2589 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2590 ierr = PetscFree(nnz);CHKERRQ(ierr); 2591 /* Set initial identity in the matrix */ 2592 for (i=0;i<pcis->n;i++) { 2593 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2594 } 2595 2596 if (pcbddc->dbg_flag) { 2597 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2598 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2599 } 2600 2601 2602 /* Now we loop on the constraints which need a change of basis */ 2603 /* 2604 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2605 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2606 2607 Basic blocks of change of basis matrix T computed by 2608 2609 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2610 2611 | 1 0 ... 0 s_1/S | 2612 | 0 1 ... 0 s_2/S | 2613 | ... | 2614 | 0 ... 1 s_{n-1}/S | 2615 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2616 2617 with S = \sum_{i=1}^n s_i^2 2618 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2619 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2620 2621 - QR decomposition of constraints otherwise 2622 */ 2623 if (qr_needed) { 2624 /* space to store Q */ 2625 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2626 /* first we issue queries for optimal work */ 2627 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2628 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2629 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2630 lqr_work = -1; 2631 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2632 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2633 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2634 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2635 lgqr_work = -1; 2636 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2637 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2638 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2639 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2640 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2641 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2642 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2643 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2644 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2645 /* array to store scaling factors for reflectors */ 2646 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2647 /* array to store rhs and solution of triangular solver */ 2648 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2649 /* allocating workspace for check */ 2650 if (pcbddc->dbg_flag) { 2651 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2652 } 2653 } 2654 /* array to store whether a node is primal or not */ 2655 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2656 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2657 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2658 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 2659 for (i=0;i<total_primal_vertices;i++) { 2660 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2661 } 2662 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2663 2664 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2665 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2666 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2667 if (PetscBTLookup(change_basis,total_counts)) { 2668 /* get constraint info */ 2669 primal_dofs = constraints_n[total_counts]; 2670 dual_dofs = size_of_constraint-primal_dofs; 2671 2672 if (pcbddc->dbg_flag) { 2673 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2674 } 2675 2676 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2677 2678 /* copy quadrature constraints for change of basis check */ 2679 if (pcbddc->dbg_flag) { 2680 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2681 } 2682 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2683 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2684 2685 /* compute QR decomposition of constraints */ 2686 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2687 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2688 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2689 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2690 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2691 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2692 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2693 2694 /* explictly compute R^-T */ 2695 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2696 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2697 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2698 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2699 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2700 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2701 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2702 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2703 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2704 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2705 2706 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2707 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2709 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2710 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2711 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2712 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2713 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2714 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2715 2716 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2717 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2718 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2719 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2720 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2721 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2725 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2726 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2727 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2728 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2729 2730 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2731 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2732 /* insert cols for primal dofs */ 2733 for (j=0;j<primal_dofs;j++) { 2734 start_vals = &qr_basis[j*size_of_constraint]; 2735 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2736 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2737 } 2738 /* insert cols for dual dofs */ 2739 for (j=0,k=0;j<dual_dofs;k++) { 2740 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2741 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2742 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2743 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2744 j++; 2745 } 2746 } 2747 2748 /* check change of basis */ 2749 if (pcbddc->dbg_flag) { 2750 PetscInt ii,jj; 2751 PetscBool valid_qr=PETSC_TRUE; 2752 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2753 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2754 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2755 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2756 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2757 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2758 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2759 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2760 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2761 for (jj=0;jj<size_of_constraint;jj++) { 2762 for (ii=0;ii<primal_dofs;ii++) { 2763 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2764 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2765 } 2766 } 2767 if (!valid_qr) { 2768 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2769 for (jj=0;jj<size_of_constraint;jj++) { 2770 for (ii=0;ii<primal_dofs;ii++) { 2771 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2772 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2773 } 2774 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2775 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2776 } 2777 } 2778 } 2779 } else { 2780 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2781 } 2782 } 2783 } else { /* simple transformation block */ 2784 PetscInt row,col; 2785 PetscScalar val,norm; 2786 2787 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2788 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2789 for (j=0;j<size_of_constraint;j++) { 2790 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2791 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2792 if (!PetscBTLookup(is_primal,row_B)) { 2793 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2794 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2795 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2796 } else { 2797 for (k=0;k<size_of_constraint;k++) { 2798 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2799 if (row != col) { 2800 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2801 } else { 2802 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2803 } 2804 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2805 } 2806 } 2807 } 2808 if (pcbddc->dbg_flag) { 2809 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2810 } 2811 } 2812 } else { 2813 if (pcbddc->dbg_flag) { 2814 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2815 } 2816 } 2817 } 2818 2819 /* free workspace */ 2820 if (qr_needed) { 2821 if (pcbddc->dbg_flag) { 2822 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2823 } 2824 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2825 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2826 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2827 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2828 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2829 } 2830 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2831 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2832 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2833 2834 /* assembling of global change of variable */ 2835 { 2836 Mat tmat; 2837 PetscInt bs; 2838 2839 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2840 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2841 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2842 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2843 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2844 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2845 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2846 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2847 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2848 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2849 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2850 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2851 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2852 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2853 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2854 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2855 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2856 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2857 } 2858 /* check */ 2859 if (pcbddc->dbg_flag) { 2860 PetscReal error; 2861 Vec x,x_change; 2862 2863 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2864 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2865 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2866 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2867 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2868 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2869 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2870 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2871 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2872 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2873 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2874 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2875 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2876 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2877 ierr = VecDestroy(&x);CHKERRQ(ierr); 2878 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2879 } 2880 2881 /* adapt sub_schurs computed (if any) */ 2882 if (pcbddc->use_deluxe_scaling) { 2883 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2884 if (sub_schurs->S_Ej_all) { 2885 Mat S_new,tmat; 2886 IS is_all_N; 2887 2888 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2889 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2890 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2891 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2892 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2893 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2894 sub_schurs->S_Ej_all = S_new; 2895 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2896 if (sub_schurs->sum_S_Ej_all) { 2897 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2898 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2899 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2900 sub_schurs->sum_S_Ej_all = S_new; 2901 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2902 } 2903 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2904 } 2905 } 2906 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2907 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2908 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2909 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2910 } 2911 2912 /* set up change of basis context */ 2913 if (pcbddc->ChangeOfBasisMatrix) { 2914 PCBDDCChange_ctx change_ctx; 2915 2916 if (!pcbddc->new_global_mat) { 2917 PetscInt global_size,local_size; 2918 2919 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2920 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2921 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2922 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2923 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2924 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2925 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2926 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2927 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2928 } else { 2929 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2930 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2931 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2932 } 2933 if (!pcbddc->user_ChangeOfBasisMatrix) { 2934 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2935 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2936 } else { 2937 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2938 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2939 } 2940 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2941 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2942 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2943 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2944 } 2945 2946 /* check if a new primal space has been introduced */ 2947 pcbddc->new_primal_space_local = PETSC_TRUE; 2948 if (olocal_primal_size == pcbddc->local_primal_size) { 2949 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2950 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2951 if (!pcbddc->new_primal_space_local) { 2952 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2953 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2954 } 2955 } 2956 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2957 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2958 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2959 2960 /* flush dbg viewer */ 2961 if (pcbddc->dbg_flag) { 2962 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2963 } 2964 2965 /* free workspace */ 2966 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2967 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2968 if (!pcbddc->adaptive_selection) { 2969 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2970 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2971 } else { 2972 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2973 pcbddc->adaptive_constraints_idxs_ptr, 2974 pcbddc->adaptive_constraints_data_ptr, 2975 pcbddc->adaptive_constraints_idxs, 2976 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2977 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2978 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2979 } 2980 PetscFunctionReturn(0); 2981 } 2982 2983 #undef __FUNCT__ 2984 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2985 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2986 { 2987 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2988 PC_IS *pcis = (PC_IS*)pc->data; 2989 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2990 PetscInt ierr,i,vertex_size,N; 2991 PetscViewer viewer=pcbddc->dbg_viewer; 2992 2993 PetscFunctionBegin; 2994 /* Reset previously computed graph */ 2995 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2996 /* Init local Graph struct */ 2997 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2998 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 2999 3000 /* Check validity of the csr graph passed in by the user */ 3001 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3002 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3003 } 3004 3005 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3006 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3007 PetscInt *xadj,*adjncy; 3008 PetscInt nvtxs; 3009 PetscBool flg_row=PETSC_FALSE; 3010 3011 if (pcbddc->use_local_adj) { 3012 3013 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3014 if (flg_row) { 3015 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3016 pcbddc->computed_rowadj = PETSC_TRUE; 3017 } 3018 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3019 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3020 IS is_dummy; 3021 ISLocalToGlobalMapping l2gmap_dummy; 3022 PetscInt j,sum; 3023 PetscInt *cxadj,*cadjncy; 3024 const PetscInt *idxs; 3025 PCBDDCGraph graph; 3026 PetscBT is_on_boundary; 3027 3028 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3029 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3030 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3031 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3032 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3033 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3034 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3035 if (flg_row) { 3036 graph->xadj = xadj; 3037 graph->adjncy = adjncy; 3038 } 3039 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3040 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3041 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3042 3043 if (pcbddc->dbg_flag) { 3044 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3045 for (i=0;i<graph->ncc;i++) { 3046 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3047 } 3048 } 3049 3050 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3051 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3052 for (i=0;i<pcis->n_B;i++) { 3053 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3054 } 3055 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3056 3057 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3058 sum = 0; 3059 for (i=0;i<graph->ncc;i++) { 3060 PetscInt sizecc = 0; 3061 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3062 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3063 sizecc++; 3064 } 3065 } 3066 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3067 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3068 cxadj[graph->queue[j]] = sizecc; 3069 } 3070 } 3071 sum += sizecc*sizecc; 3072 } 3073 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3074 sum = 0; 3075 for (i=0;i<pcis->n;i++) { 3076 PetscInt temp = cxadj[i]; 3077 cxadj[i] = sum; 3078 sum += temp; 3079 } 3080 cxadj[pcis->n] = sum; 3081 for (i=0;i<graph->ncc;i++) { 3082 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3083 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3084 PetscInt k,sizecc = 0; 3085 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3086 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3087 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3088 sizecc++; 3089 } 3090 } 3091 } 3092 } 3093 } 3094 if (sum) { 3095 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3096 } else { 3097 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3098 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3099 } 3100 graph->xadj = 0; 3101 graph->adjncy = 0; 3102 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3103 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3104 } 3105 } 3106 if (pcbddc->dbg_flag) { 3107 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3108 } 3109 3110 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3111 vertex_size = 1; 3112 if (pcbddc->user_provided_isfordofs) { 3113 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3114 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3115 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3116 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3117 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3118 } 3119 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3120 pcbddc->n_ISForDofs = 0; 3121 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3122 } 3123 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3124 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3125 } else { 3126 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3127 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3128 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3129 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3130 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3131 } 3132 } 3133 } 3134 3135 /* Setup of Graph */ 3136 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3137 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3138 } 3139 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3140 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3141 } 3142 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3143 3144 /* Graph's connected components analysis */ 3145 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3146 3147 /* print some info to stdout */ 3148 if (pcbddc->dbg_flag) { 3149 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3150 } 3151 3152 /* mark topography has done */ 3153 pcbddc->recompute_topography = PETSC_FALSE; 3154 PetscFunctionReturn(0); 3155 } 3156 3157 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3158 #undef __FUNCT__ 3159 #define __FUNCT__ "PCBDDCSubsetNumbering" 3160 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3161 { 3162 PetscSF sf; 3163 PetscLayout map; 3164 const PetscInt *idxs; 3165 PetscInt *leaf_data,*root_data,*gidxs; 3166 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3167 PetscInt n_n,nlocals,start,first_index; 3168 PetscMPIInt commsize; 3169 PetscBool first_found; 3170 PetscErrorCode ierr; 3171 3172 PetscFunctionBegin; 3173 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3174 if (subset_mult) { 3175 PetscCheckSameComm(subset,1,subset_mult,2); 3176 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3177 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3178 } 3179 /* create workspace layout for computing global indices of subset */ 3180 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3181 lbounds[0] = lbounds[1] = 0; 3182 for (i=0;i<n;i++) { 3183 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3184 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3185 } 3186 lbounds[0] = -lbounds[0]; 3187 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3188 gbounds[0] = -gbounds[0]; 3189 N = gbounds[1] - gbounds[0] + 1; 3190 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3191 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3192 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3193 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3194 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3195 3196 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3197 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3198 if (subset_mult) { 3199 const PetscInt* idxs_mult; 3200 3201 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3202 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3203 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3204 } else { 3205 for (i=0;i<n;i++) leaf_data[i] = 1; 3206 } 3207 /* local size of new subset */ 3208 n_n = 0; 3209 for (i=0;i<n;i++) n_n += leaf_data[i]; 3210 3211 /* global indexes in layout */ 3212 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3213 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3214 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3215 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3216 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3217 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3218 3219 /* reduce from leaves to roots */ 3220 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3221 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3222 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3223 3224 /* count indexes in local part of layout */ 3225 nlocals = 0; 3226 first_index = -1; 3227 first_found = PETSC_FALSE; 3228 for (i=0;i<Nl;i++) { 3229 if (!first_found && root_data[i]) { 3230 first_found = PETSC_TRUE; 3231 first_index = i; 3232 } 3233 nlocals += root_data[i]; 3234 } 3235 3236 /* cumulative of number of indexes and size of subset without holes */ 3237 #if defined(PETSC_HAVE_MPI_EXSCAN) 3238 start = 0; 3239 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3240 #else 3241 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3242 start = start-nlocals; 3243 #endif 3244 3245 if (N_n) { /* compute total size of new subset if requested */ 3246 *N_n = start + nlocals; 3247 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3248 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3249 } 3250 3251 /* adapt root data with cumulative */ 3252 if (first_found) { 3253 PetscInt old_index; 3254 3255 root_data[first_index] += start; 3256 old_index = first_index; 3257 for (i=first_index+1;i<Nl;i++) { 3258 if (root_data[i]) { 3259 root_data[i] += root_data[old_index]; 3260 old_index = i; 3261 } 3262 } 3263 } 3264 3265 /* from roots to leaves */ 3266 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3267 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3268 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3269 3270 /* create new IS with global indexes without holes */ 3271 if (subset_mult) { 3272 const PetscInt* idxs_mult; 3273 PetscInt cum; 3274 3275 cum = 0; 3276 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3277 for (i=0;i<n;i++) { 3278 PetscInt j; 3279 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3280 } 3281 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3282 } else { 3283 for (i=0;i<n;i++) { 3284 gidxs[i] = leaf_data[i]-1; 3285 } 3286 } 3287 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3288 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3289 PetscFunctionReturn(0); 3290 } 3291 3292 #undef __FUNCT__ 3293 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3294 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3295 { 3296 PetscInt i,j; 3297 PetscScalar *alphas; 3298 PetscErrorCode ierr; 3299 3300 PetscFunctionBegin; 3301 /* this implements stabilized Gram-Schmidt */ 3302 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3303 for (i=0;i<n;i++) { 3304 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3305 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3306 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3307 } 3308 ierr = PetscFree(alphas);CHKERRQ(ierr); 3309 PetscFunctionReturn(0); 3310 } 3311 3312 #undef __FUNCT__ 3313 #define __FUNCT__ "MatISGetSubassemblingPattern" 3314 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3315 { 3316 IS ranks_send_to; 3317 PetscInt n_neighs,*neighs,*n_shared,**shared; 3318 PetscMPIInt size,rank,color; 3319 PetscInt *xadj,*adjncy; 3320 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3321 PetscInt i,local_size,threshold=0; 3322 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3323 PetscSubcomm subcomm; 3324 PetscErrorCode ierr; 3325 3326 PetscFunctionBegin; 3327 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3328 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3329 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3330 3331 /* Get info on mapping */ 3332 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3333 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3334 3335 /* build local CSR graph of subdomains' connectivity */ 3336 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3337 xadj[0] = 0; 3338 xadj[1] = PetscMax(n_neighs-1,0); 3339 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3340 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3341 3342 if (threshold) { 3343 PetscInt xadj_count = 0; 3344 for (i=1;i<n_neighs;i++) { 3345 if (n_shared[i] > threshold) { 3346 adjncy[xadj_count] = neighs[i]; 3347 adjncy_wgt[xadj_count] = n_shared[i]; 3348 xadj_count++; 3349 } 3350 } 3351 xadj[1] = xadj_count; 3352 } else { 3353 if (xadj[1]) { 3354 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3355 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3356 } 3357 } 3358 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3359 if (use_square) { 3360 for (i=0;i<xadj[1];i++) { 3361 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3362 } 3363 } 3364 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3365 3366 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3367 3368 /* 3369 Restrict work on active processes only. 3370 */ 3371 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3372 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3373 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3374 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3375 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3376 if (color) { 3377 ierr = PetscFree(xadj);CHKERRQ(ierr); 3378 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3379 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3380 } else { 3381 Mat subdomain_adj; 3382 IS new_ranks,new_ranks_contig; 3383 MatPartitioning partitioner; 3384 PetscInt prank,rstart=0,rend=0; 3385 PetscInt *is_indices,*oldranks; 3386 PetscBool aggregate; 3387 3388 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3389 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3390 prank = rank; 3391 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3392 /* 3393 for (i=0;i<size;i++) { 3394 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3395 } 3396 */ 3397 for (i=0;i<xadj[1];i++) { 3398 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3399 } 3400 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3401 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3402 if (aggregate) { 3403 PetscInt lrows,row,ncols,*cols; 3404 PetscMPIInt nrank; 3405 PetscScalar *vals; 3406 3407 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3408 lrows = 0; 3409 if (nrank<redprocs) { 3410 lrows = size/redprocs; 3411 if (nrank<size%redprocs) lrows++; 3412 } 3413 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3414 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3415 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3416 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3417 row = nrank; 3418 ncols = xadj[1]-xadj[0]; 3419 cols = adjncy; 3420 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3421 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3422 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3423 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3424 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3425 ierr = PetscFree(xadj);CHKERRQ(ierr); 3426 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3427 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3428 ierr = PetscFree(vals);CHKERRQ(ierr); 3429 } else { 3430 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3431 } 3432 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3433 3434 /* Partition */ 3435 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3436 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3437 if (use_vwgt) { 3438 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3439 v_wgt[0] = local_size; 3440 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3441 } 3442 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3443 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3444 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3445 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3446 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3447 3448 /* renumber new_ranks to avoid "holes" in new set of processors */ 3449 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3450 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3451 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3452 if (!redprocs) { 3453 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3454 } else { 3455 PetscInt idxs[1]; 3456 PetscMPIInt tag; 3457 MPI_Request *reqs; 3458 3459 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3460 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3461 for (i=rstart;i<rend;i++) { 3462 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3463 } 3464 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3465 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3466 ierr = PetscFree(reqs);CHKERRQ(ierr); 3467 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3468 } 3469 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3470 /* clean up */ 3471 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3472 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3473 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3474 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3475 } 3476 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3477 3478 /* assemble parallel IS for sends */ 3479 i = 1; 3480 if (color) i=0; 3481 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3482 /* get back IS */ 3483 *is_sends = ranks_send_to; 3484 PetscFunctionReturn(0); 3485 } 3486 3487 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3488 3489 #undef __FUNCT__ 3490 #define __FUNCT__ "MatISSubassemble" 3491 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3492 { 3493 Mat local_mat; 3494 IS is_sends_internal; 3495 PetscInt rows,cols,new_local_rows; 3496 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3497 PetscBool ismatis,isdense,newisdense,destroy_mat; 3498 ISLocalToGlobalMapping l2gmap; 3499 PetscInt* l2gmap_indices; 3500 const PetscInt* is_indices; 3501 MatType new_local_type; 3502 /* buffers */ 3503 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3504 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3505 PetscInt *recv_buffer_idxs_local; 3506 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3507 /* MPI */ 3508 MPI_Comm comm,comm_n; 3509 PetscSubcomm subcomm; 3510 PetscMPIInt n_sends,n_recvs,commsize; 3511 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3512 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3513 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3514 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3515 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3516 PetscErrorCode ierr; 3517 3518 PetscFunctionBegin; 3519 /* TODO: add missing checks */ 3520 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3521 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3522 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3523 PetscValidLogicalCollectiveInt(mat,nis,7); 3524 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3525 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3526 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3527 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3528 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3529 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3530 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3531 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3532 PetscInt mrows,mcols,mnrows,mncols; 3533 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3534 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3535 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3536 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3537 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3538 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3539 } 3540 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3541 PetscValidLogicalCollectiveInt(mat,bs,0); 3542 /* prepare IS for sending if not provided */ 3543 if (!is_sends) { 3544 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3545 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3546 } else { 3547 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3548 is_sends_internal = is_sends; 3549 } 3550 3551 /* get comm */ 3552 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3553 3554 /* compute number of sends */ 3555 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3556 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3557 3558 /* compute number of receives */ 3559 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3560 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3561 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3562 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3563 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3564 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3565 ierr = PetscFree(iflags);CHKERRQ(ierr); 3566 3567 /* restrict comm if requested */ 3568 subcomm = 0; 3569 destroy_mat = PETSC_FALSE; 3570 if (restrict_comm) { 3571 PetscMPIInt color,subcommsize; 3572 3573 color = 0; 3574 if (restrict_full) { 3575 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3576 } else { 3577 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3578 } 3579 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3580 subcommsize = commsize - subcommsize; 3581 /* check if reuse has been requested */ 3582 if (reuse == MAT_REUSE_MATRIX) { 3583 if (*mat_n) { 3584 PetscMPIInt subcommsize2; 3585 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3586 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3587 comm_n = PetscObjectComm((PetscObject)*mat_n); 3588 } else { 3589 comm_n = PETSC_COMM_SELF; 3590 } 3591 } else { /* MAT_INITIAL_MATRIX */ 3592 PetscMPIInt rank; 3593 3594 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3595 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3596 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3597 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3598 comm_n = PetscSubcommChild(subcomm); 3599 } 3600 /* flag to destroy *mat_n if not significative */ 3601 if (color) destroy_mat = PETSC_TRUE; 3602 } else { 3603 comm_n = comm; 3604 } 3605 3606 /* prepare send/receive buffers */ 3607 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3608 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3609 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3610 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3611 if (nis) { 3612 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3613 } 3614 3615 /* Get data from local matrices */ 3616 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3617 /* TODO: See below some guidelines on how to prepare the local buffers */ 3618 /* 3619 send_buffer_vals should contain the raw values of the local matrix 3620 send_buffer_idxs should contain: 3621 - MatType_PRIVATE type 3622 - PetscInt size_of_l2gmap 3623 - PetscInt global_row_indices[size_of_l2gmap] 3624 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3625 */ 3626 else { 3627 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3628 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3629 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3630 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3631 send_buffer_idxs[1] = i; 3632 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3633 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3634 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3635 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3636 for (i=0;i<n_sends;i++) { 3637 ilengths_vals[is_indices[i]] = len*len; 3638 ilengths_idxs[is_indices[i]] = len+2; 3639 } 3640 } 3641 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3642 /* additional is (if any) */ 3643 if (nis) { 3644 PetscMPIInt psum; 3645 PetscInt j; 3646 for (j=0,psum=0;j<nis;j++) { 3647 PetscInt plen; 3648 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3649 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3650 psum += len+1; /* indices + lenght */ 3651 } 3652 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3653 for (j=0,psum=0;j<nis;j++) { 3654 PetscInt plen; 3655 const PetscInt *is_array_idxs; 3656 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3657 send_buffer_idxs_is[psum] = plen; 3658 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3659 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3660 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3661 psum += plen+1; /* indices + lenght */ 3662 } 3663 for (i=0;i<n_sends;i++) { 3664 ilengths_idxs_is[is_indices[i]] = psum; 3665 } 3666 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3667 } 3668 3669 buf_size_idxs = 0; 3670 buf_size_vals = 0; 3671 buf_size_idxs_is = 0; 3672 for (i=0;i<n_recvs;i++) { 3673 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3674 buf_size_vals += (PetscInt)olengths_vals[i]; 3675 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3676 } 3677 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3678 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3679 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3680 3681 /* get new tags for clean communications */ 3682 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3683 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3684 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3685 3686 /* allocate for requests */ 3687 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3688 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3689 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3690 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3691 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3692 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3693 3694 /* communications */ 3695 ptr_idxs = recv_buffer_idxs; 3696 ptr_vals = recv_buffer_vals; 3697 ptr_idxs_is = recv_buffer_idxs_is; 3698 for (i=0;i<n_recvs;i++) { 3699 source_dest = onodes[i]; 3700 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3701 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3702 ptr_idxs += olengths_idxs[i]; 3703 ptr_vals += olengths_vals[i]; 3704 if (nis) { 3705 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3706 ptr_idxs_is += olengths_idxs_is[i]; 3707 } 3708 } 3709 for (i=0;i<n_sends;i++) { 3710 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3711 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3712 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3713 if (nis) { 3714 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3715 } 3716 } 3717 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3718 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3719 3720 /* assemble new l2g map */ 3721 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3722 ptr_idxs = recv_buffer_idxs; 3723 new_local_rows = 0; 3724 for (i=0;i<n_recvs;i++) { 3725 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3726 ptr_idxs += olengths_idxs[i]; 3727 } 3728 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3729 ptr_idxs = recv_buffer_idxs; 3730 new_local_rows = 0; 3731 for (i=0;i<n_recvs;i++) { 3732 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3733 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3734 ptr_idxs += olengths_idxs[i]; 3735 } 3736 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3737 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3738 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3739 3740 /* infer new local matrix type from received local matrices type */ 3741 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3742 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3743 if (n_recvs) { 3744 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3745 ptr_idxs = recv_buffer_idxs; 3746 for (i=0;i<n_recvs;i++) { 3747 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3748 new_local_type_private = MATAIJ_PRIVATE; 3749 break; 3750 } 3751 ptr_idxs += olengths_idxs[i]; 3752 } 3753 switch (new_local_type_private) { 3754 case MATDENSE_PRIVATE: 3755 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3756 new_local_type = MATSEQAIJ; 3757 bs = 1; 3758 } else { /* if I receive only 1 dense matrix */ 3759 new_local_type = MATSEQDENSE; 3760 bs = 1; 3761 } 3762 break; 3763 case MATAIJ_PRIVATE: 3764 new_local_type = MATSEQAIJ; 3765 bs = 1; 3766 break; 3767 case MATBAIJ_PRIVATE: 3768 new_local_type = MATSEQBAIJ; 3769 break; 3770 case MATSBAIJ_PRIVATE: 3771 new_local_type = MATSEQSBAIJ; 3772 break; 3773 default: 3774 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3775 break; 3776 } 3777 } else { /* by default, new_local_type is seqdense */ 3778 new_local_type = MATSEQDENSE; 3779 bs = 1; 3780 } 3781 3782 /* create MATIS object if needed */ 3783 if (reuse == MAT_INITIAL_MATRIX) { 3784 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3785 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3786 } else { 3787 /* it also destroys the local matrices */ 3788 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3789 } 3790 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3791 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3792 3793 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3794 3795 /* Global to local map of received indices */ 3796 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3797 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3798 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3799 3800 /* restore attributes -> type of incoming data and its size */ 3801 buf_size_idxs = 0; 3802 for (i=0;i<n_recvs;i++) { 3803 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3804 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3805 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3806 } 3807 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3808 3809 /* set preallocation */ 3810 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3811 if (!newisdense) { 3812 PetscInt *new_local_nnz=0; 3813 3814 ptr_idxs = recv_buffer_idxs_local; 3815 if (n_recvs) { 3816 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3817 } 3818 for (i=0;i<n_recvs;i++) { 3819 PetscInt j; 3820 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3821 for (j=0;j<*(ptr_idxs+1);j++) { 3822 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3823 } 3824 } else { 3825 /* TODO */ 3826 } 3827 ptr_idxs += olengths_idxs[i]; 3828 } 3829 if (new_local_nnz) { 3830 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3831 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3832 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3833 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3834 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3835 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3836 } else { 3837 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3838 } 3839 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3840 } else { 3841 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3842 } 3843 3844 /* set values */ 3845 ptr_vals = recv_buffer_vals; 3846 ptr_idxs = recv_buffer_idxs_local; 3847 for (i=0;i<n_recvs;i++) { 3848 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3849 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3850 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3851 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3852 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3853 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3854 } else { 3855 /* TODO */ 3856 } 3857 ptr_idxs += olengths_idxs[i]; 3858 ptr_vals += olengths_vals[i]; 3859 } 3860 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3861 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3862 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3863 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3864 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3865 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3866 3867 #if 0 3868 if (!restrict_comm) { /* check */ 3869 Vec lvec,rvec; 3870 PetscReal infty_error; 3871 3872 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3873 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3874 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3875 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3876 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3877 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3878 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3879 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3880 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3881 } 3882 #endif 3883 3884 /* assemble new additional is (if any) */ 3885 if (nis) { 3886 PetscInt **temp_idxs,*count_is,j,psum; 3887 3888 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3889 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3890 ptr_idxs = recv_buffer_idxs_is; 3891 psum = 0; 3892 for (i=0;i<n_recvs;i++) { 3893 for (j=0;j<nis;j++) { 3894 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3895 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3896 psum += plen; 3897 ptr_idxs += plen+1; /* shift pointer to received data */ 3898 } 3899 } 3900 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3901 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3902 for (i=1;i<nis;i++) { 3903 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3904 } 3905 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3906 ptr_idxs = recv_buffer_idxs_is; 3907 for (i=0;i<n_recvs;i++) { 3908 for (j=0;j<nis;j++) { 3909 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3910 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3911 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3912 ptr_idxs += plen+1; /* shift pointer to received data */ 3913 } 3914 } 3915 for (i=0;i<nis;i++) { 3916 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3917 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3918 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3919 } 3920 ierr = PetscFree(count_is);CHKERRQ(ierr); 3921 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3922 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3923 } 3924 /* free workspace */ 3925 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3926 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3927 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3928 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3929 if (isdense) { 3930 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3931 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3932 } else { 3933 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3934 } 3935 if (nis) { 3936 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3937 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3938 } 3939 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3940 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3941 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3942 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3943 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3944 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3945 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3946 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3947 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3948 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3949 ierr = PetscFree(onodes);CHKERRQ(ierr); 3950 if (nis) { 3951 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3952 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3953 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3954 } 3955 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3956 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3957 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3958 for (i=0;i<nis;i++) { 3959 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3960 } 3961 *mat_n = NULL; 3962 } 3963 PetscFunctionReturn(0); 3964 } 3965 3966 /* temporary hack into ksp private data structure */ 3967 #include <petsc/private/kspimpl.h> 3968 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3971 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3972 { 3973 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3974 PC_IS *pcis = (PC_IS*)pc->data; 3975 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3976 MatNullSpace CoarseNullSpace=NULL; 3977 ISLocalToGlobalMapping coarse_islg; 3978 IS coarse_is,*isarray; 3979 PetscInt i,im_active=-1,active_procs=-1; 3980 PetscInt nis,nisdofs,nisneu; 3981 PC pc_temp; 3982 PCType coarse_pc_type; 3983 KSPType coarse_ksp_type; 3984 PetscBool multilevel_requested,multilevel_allowed; 3985 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3986 Mat t_coarse_mat_is; 3987 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3988 PetscMPIInt all_procs; 3989 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3990 PetscBool compute_vecs = PETSC_FALSE; 3991 PetscScalar *array; 3992 PetscErrorCode ierr; 3993 3994 PetscFunctionBegin; 3995 /* Assign global numbering to coarse dofs */ 3996 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3997 PetscInt ocoarse_size; 3998 compute_vecs = PETSC_TRUE; 3999 ocoarse_size = pcbddc->coarse_size; 4000 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4001 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4002 /* see if we can avoid some work */ 4003 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4004 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4005 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4006 PC pc; 4007 PetscBool isbddc; 4008 4009 /* temporary workaround since PCBDDC does not have a reset method so far */ 4010 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4011 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4012 if (isbddc) { 4013 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4014 } 4015 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4016 coarse_reuse = PETSC_FALSE; 4017 } else { /* we can safely reuse already computed coarse matrix */ 4018 coarse_reuse = PETSC_TRUE; 4019 } 4020 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4021 coarse_reuse = PETSC_FALSE; 4022 } 4023 /* reset any subassembling information */ 4024 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4025 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4026 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4027 coarse_reuse = PETSC_TRUE; 4028 } 4029 4030 /* count "active" (i.e. with positive local size) and "void" processes */ 4031 im_active = !!(pcis->n); 4032 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4033 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4034 void_procs = all_procs-active_procs; 4035 csin_type_simple = PETSC_TRUE; 4036 redist = PETSC_FALSE; 4037 if (pcbddc->current_level && void_procs) { 4038 csin_ml = PETSC_TRUE; 4039 ncoarse_ml = void_procs; 4040 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4041 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4042 csin_ds = PETSC_TRUE; 4043 ncoarse_ds = pcbddc->redistribute_coarse; 4044 redist = PETSC_TRUE; 4045 } else { 4046 csin_ds = PETSC_TRUE; 4047 ncoarse_ds = active_procs; 4048 redist = PETSC_TRUE; 4049 } 4050 } else { 4051 csin_ml = PETSC_FALSE; 4052 ncoarse_ml = all_procs; 4053 if (void_procs) { 4054 csin_ds = PETSC_TRUE; 4055 ncoarse_ds = void_procs; 4056 csin_type_simple = PETSC_FALSE; 4057 } else { 4058 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4059 csin_ds = PETSC_TRUE; 4060 ncoarse_ds = pcbddc->redistribute_coarse; 4061 redist = PETSC_TRUE; 4062 } else { 4063 csin_ds = PETSC_FALSE; 4064 ncoarse_ds = all_procs; 4065 } 4066 } 4067 } 4068 4069 /* 4070 test if we can go multilevel: three conditions must be satisfied: 4071 - we have not exceeded the number of levels requested 4072 - we can actually subassemble the active processes 4073 - we can find a suitable number of MPI processes where we can place the subassembled problem 4074 */ 4075 multilevel_allowed = PETSC_FALSE; 4076 multilevel_requested = PETSC_FALSE; 4077 if (pcbddc->current_level < pcbddc->max_levels) { 4078 multilevel_requested = PETSC_TRUE; 4079 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4080 multilevel_allowed = PETSC_FALSE; 4081 } else { 4082 multilevel_allowed = PETSC_TRUE; 4083 } 4084 } 4085 /* determine number of process partecipating to coarse solver */ 4086 if (multilevel_allowed) { 4087 ncoarse = ncoarse_ml; 4088 csin = csin_ml; 4089 redist = PETSC_FALSE; 4090 } else { 4091 ncoarse = ncoarse_ds; 4092 csin = csin_ds; 4093 } 4094 4095 /* creates temporary l2gmap and IS for coarse indexes */ 4096 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4097 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4098 4099 /* creates temporary MATIS object for coarse matrix */ 4100 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4101 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4102 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4103 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4104 #if 0 4105 { 4106 PetscViewer viewer; 4107 char filename[256]; 4108 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4109 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4110 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4111 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4112 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 4113 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4114 } 4115 #endif 4116 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4117 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4118 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4119 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4120 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4121 4122 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4123 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4124 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4125 const PetscInt *idxs; 4126 ISLocalToGlobalMapping tmap; 4127 4128 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4129 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4130 /* allocate space for temporary storage */ 4131 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4132 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4133 /* allocate for IS array */ 4134 nisdofs = pcbddc->n_ISForDofsLocal; 4135 nisneu = !!pcbddc->NeumannBoundariesLocal; 4136 nis = nisdofs + nisneu; 4137 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4138 /* dofs splitting */ 4139 for (i=0;i<nisdofs;i++) { 4140 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4141 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4142 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4143 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4144 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4145 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4146 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4147 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4148 } 4149 /* neumann boundaries */ 4150 if (pcbddc->NeumannBoundariesLocal) { 4151 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4152 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4153 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4154 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4155 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4156 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4157 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4158 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4159 } 4160 /* free memory */ 4161 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4162 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4163 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4164 } else { 4165 nis = 0; 4166 nisdofs = 0; 4167 nisneu = 0; 4168 isarray = NULL; 4169 } 4170 /* destroy no longer needed map */ 4171 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4172 4173 /* restrict on coarse candidates (if needed) */ 4174 coarse_mat_is = NULL; 4175 if (csin) { 4176 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4177 if (redist) { 4178 PetscMPIInt rank; 4179 PetscInt spc,n_spc_p1,dest[1],destsize; 4180 4181 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4182 spc = active_procs/ncoarse; 4183 n_spc_p1 = active_procs%ncoarse; 4184 if (im_active) { 4185 destsize = 1; 4186 if (rank > n_spc_p1*(spc+1)-1) { 4187 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4188 } else { 4189 dest[0] = rank/(spc+1); 4190 } 4191 } else { 4192 destsize = 0; 4193 } 4194 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4195 } else if (csin_type_simple) { 4196 PetscMPIInt rank; 4197 PetscInt issize,isidx; 4198 4199 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4200 if (im_active) { 4201 issize = 1; 4202 isidx = (PetscInt)rank; 4203 } else { 4204 issize = 0; 4205 isidx = -1; 4206 } 4207 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4208 } else { /* get a suitable subassembling pattern from MATIS code */ 4209 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4210 } 4211 4212 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4213 if (!redist || ncoarse <= void_procs) { 4214 PetscInt ncoarse_cand,tissize,*nisindices; 4215 PetscInt *coarse_candidates; 4216 const PetscInt* tisindices; 4217 4218 /* get coarse candidates' ranks in pc communicator */ 4219 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4220 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4221 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4222 if (!coarse_candidates[i]) { 4223 coarse_candidates[ncoarse_cand++]=i; 4224 } 4225 } 4226 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4227 4228 4229 if (pcbddc->dbg_flag) { 4230 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4231 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4232 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4233 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4234 for (i=0;i<ncoarse_cand;i++) { 4235 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4236 } 4237 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4238 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4239 } 4240 /* shift the pattern on coarse candidates */ 4241 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4242 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4243 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4244 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4245 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4246 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4247 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4248 } 4249 if (pcbddc->dbg_flag) { 4250 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4251 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4252 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4253 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4254 } 4255 } 4256 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4257 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4258 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4259 } else { /* this is the last level, so use just receiving processes in subcomm */ 4260 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4261 } 4262 } else { 4263 if (pcbddc->dbg_flag) { 4264 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4266 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4267 } 4268 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4269 coarse_mat_is = t_coarse_mat_is; 4270 } 4271 4272 /* create local to global scatters for coarse problem */ 4273 if (compute_vecs) { 4274 PetscInt lrows; 4275 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4276 if (coarse_mat_is) { 4277 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4278 } else { 4279 lrows = 0; 4280 } 4281 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4282 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4283 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4284 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4285 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4286 } 4287 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4288 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4289 4290 /* set defaults for coarse KSP and PC */ 4291 if (multilevel_allowed) { 4292 coarse_ksp_type = KSPRICHARDSON; 4293 coarse_pc_type = PCBDDC; 4294 } else { 4295 coarse_ksp_type = KSPPREONLY; 4296 coarse_pc_type = PCREDUNDANT; 4297 } 4298 4299 /* print some info if requested */ 4300 if (pcbddc->dbg_flag) { 4301 if (!multilevel_allowed) { 4302 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4303 if (multilevel_requested) { 4304 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4305 } else if (pcbddc->max_levels) { 4306 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4307 } 4308 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4309 } 4310 } 4311 4312 /* create the coarse KSP object only once with defaults */ 4313 if (coarse_mat_is) { 4314 MatReuse coarse_mat_reuse; 4315 PetscViewer dbg_viewer = NULL; 4316 if (pcbddc->dbg_flag) { 4317 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4318 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4319 } 4320 if (!pcbddc->coarse_ksp) { 4321 char prefix[256],str_level[16]; 4322 size_t len; 4323 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4324 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4325 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4326 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4327 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4328 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4329 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4330 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4331 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4332 /* prefix */ 4333 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4334 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4335 if (!pcbddc->current_level) { 4336 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4337 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4338 } else { 4339 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4340 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4341 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4342 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4343 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4344 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4345 } 4346 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4347 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4348 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4349 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4350 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4351 /* allow user customization */ 4352 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4353 } 4354 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4355 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4356 if (nisdofs) { 4357 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4358 for (i=0;i<nisdofs;i++) { 4359 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4360 } 4361 } 4362 if (nisneu) { 4363 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4364 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4365 } 4366 4367 /* get some info after set from options */ 4368 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4369 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4370 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4371 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4372 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4373 isbddc = PETSC_FALSE; 4374 } 4375 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4376 if (isredundant) { 4377 KSP inner_ksp; 4378 PC inner_pc; 4379 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4380 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4381 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4382 } 4383 4384 /* assemble coarse matrix */ 4385 if (coarse_reuse) { 4386 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4387 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4388 coarse_mat_reuse = MAT_REUSE_MATRIX; 4389 } else { 4390 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4391 } 4392 if (isbddc || isnn) { 4393 if (pcbddc->coarsening_ratio > 1) { 4394 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4395 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4396 if (pcbddc->dbg_flag) { 4397 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4398 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4399 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4400 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4401 } 4402 } 4403 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4404 } else { 4405 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4406 coarse_mat = coarse_mat_is; 4407 } 4408 } else { 4409 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4410 } 4411 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4412 4413 /* propagate symmetry info of coarse matrix */ 4414 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4415 if (pc->pmat->symmetric_set) { 4416 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4417 } 4418 if (pc->pmat->hermitian_set) { 4419 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4420 } 4421 if (pc->pmat->spd_set) { 4422 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4423 } 4424 /* set operators */ 4425 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4426 if (pcbddc->dbg_flag) { 4427 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4428 } 4429 } else { /* processes non partecipating to coarse solver (if any) */ 4430 coarse_mat = 0; 4431 } 4432 ierr = PetscFree(isarray);CHKERRQ(ierr); 4433 #if 0 4434 { 4435 PetscViewer viewer; 4436 char filename[256]; 4437 sprintf(filename,"coarse_mat.m"); 4438 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4439 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4440 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4441 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 4442 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4443 } 4444 #endif 4445 4446 /* Compute coarse null space (special handling by BDDC only) */ 4447 #if 0 4448 if (pcbddc->NullSpace) { 4449 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4450 } 4451 #endif 4452 4453 if (pcbddc->coarse_ksp) { 4454 Vec crhs,csol; 4455 PetscBool ispreonly; 4456 4457 if (CoarseNullSpace) { 4458 if (isbddc) { 4459 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4460 } else { 4461 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4462 } 4463 } 4464 /* setup coarse ksp */ 4465 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4466 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4467 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4468 /* hack */ 4469 if (!csol) { 4470 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4471 } 4472 if (!crhs) { 4473 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4474 } 4475 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4476 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4477 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4478 KSP check_ksp; 4479 KSPType check_ksp_type; 4480 PC check_pc; 4481 Vec check_vec,coarse_vec; 4482 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4483 PetscInt its; 4484 PetscBool compute_eigs; 4485 PetscReal *eigs_r,*eigs_c; 4486 PetscInt neigs; 4487 const char *prefix; 4488 4489 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4490 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4491 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4492 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4493 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4494 if (ispreonly) { 4495 check_ksp_type = KSPPREONLY; 4496 compute_eigs = PETSC_FALSE; 4497 } else { 4498 check_ksp_type = KSPGMRES; 4499 compute_eigs = PETSC_TRUE; 4500 } 4501 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4502 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4503 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4504 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4505 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4506 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4507 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4508 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4509 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4510 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4511 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4512 /* create random vec */ 4513 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4514 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4515 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4516 if (CoarseNullSpace) { 4517 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4518 } 4519 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4520 /* solve coarse problem */ 4521 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4522 if (CoarseNullSpace) { 4523 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4524 } 4525 /* set eigenvalue estimation if preonly has not been requested */ 4526 if (compute_eigs) { 4527 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4528 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4529 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4530 lambda_max = eigs_r[neigs-1]; 4531 lambda_min = eigs_r[0]; 4532 if (pcbddc->use_coarse_estimates) { 4533 if (lambda_max>lambda_min) { 4534 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4535 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4536 } 4537 } 4538 } 4539 4540 /* check coarse problem residual error */ 4541 if (pcbddc->dbg_flag) { 4542 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4543 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4544 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4545 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4546 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4547 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4548 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4549 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4550 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4551 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4552 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4553 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4554 if (compute_eigs) { 4555 PetscReal lambda_max_s,lambda_min_s; 4556 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4557 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4558 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4559 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4560 for (i=0;i<neigs;i++) { 4561 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4562 } 4563 } 4564 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4565 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4566 } 4567 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4568 if (compute_eigs) { 4569 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4570 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4571 } 4572 } 4573 } 4574 /* print additional info */ 4575 if (pcbddc->dbg_flag) { 4576 /* waits until all processes reaches this point */ 4577 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4578 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4579 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4580 } 4581 4582 /* free memory */ 4583 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4584 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4585 PetscFunctionReturn(0); 4586 } 4587 4588 #undef __FUNCT__ 4589 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4590 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4591 { 4592 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4593 PC_IS* pcis = (PC_IS*)pc->data; 4594 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4595 IS subset,subset_mult,subset_n; 4596 PetscInt local_size,coarse_size=0; 4597 PetscInt *local_primal_indices=NULL; 4598 const PetscInt *t_local_primal_indices; 4599 PetscErrorCode ierr; 4600 4601 PetscFunctionBegin; 4602 /* Compute global number of coarse dofs */ 4603 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4604 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4605 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4606 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4607 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4608 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4609 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4610 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4611 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4612 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 4613 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4614 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4615 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4616 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4617 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4618 4619 /* check numbering */ 4620 if (pcbddc->dbg_flag) { 4621 PetscScalar coarsesum,*array; 4622 PetscInt i; 4623 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4624 4625 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4626 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4627 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4628 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4629 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4630 for (i=0;i<pcbddc->local_primal_size;i++) { 4631 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4632 } 4633 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4634 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4635 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4636 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4637 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4638 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4639 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4640 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4641 for (i=0;i<pcis->n;i++) { 4642 if (array[i] == 1.0) { 4643 set_error = PETSC_TRUE; 4644 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4645 } 4646 } 4647 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4648 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4649 for (i=0;i<pcis->n;i++) { 4650 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4651 } 4652 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4653 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4654 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4655 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4656 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4658 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4659 PetscInt *gidxs; 4660 4661 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4662 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4663 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4664 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4665 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4666 for (i=0;i<pcbddc->local_primal_size;i++) { 4667 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4668 } 4669 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4670 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4671 } 4672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4673 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4674 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4675 } 4676 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4677 /* get back data */ 4678 *coarse_size_n = coarse_size; 4679 *local_primal_indices_n = local_primal_indices; 4680 PetscFunctionReturn(0); 4681 } 4682 4683 #undef __FUNCT__ 4684 #define __FUNCT__ "PCBDDCGlobalToLocal" 4685 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4686 { 4687 IS localis_t; 4688 PetscInt i,lsize,*idxs,n; 4689 PetscScalar *vals; 4690 PetscErrorCode ierr; 4691 4692 PetscFunctionBegin; 4693 /* get indices in local ordering exploiting local to global map */ 4694 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4695 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4696 for (i=0;i<lsize;i++) vals[i] = 1.0; 4697 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4698 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4699 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4700 if (idxs) { /* multilevel guard */ 4701 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4702 } 4703 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4704 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4705 ierr = PetscFree(vals);CHKERRQ(ierr); 4706 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4707 /* now compute set in local ordering */ 4708 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4709 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4710 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4711 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4712 for (i=0,lsize=0;i<n;i++) { 4713 if (PetscRealPart(vals[i]) > 0.5) { 4714 lsize++; 4715 } 4716 } 4717 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4718 for (i=0,lsize=0;i<n;i++) { 4719 if (PetscRealPart(vals[i]) > 0.5) { 4720 idxs[lsize++] = i; 4721 } 4722 } 4723 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4724 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4725 *localis = localis_t; 4726 PetscFunctionReturn(0); 4727 } 4728 4729 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4730 #undef __FUNCT__ 4731 #define __FUNCT__ "PCBDDCMatMult_Private" 4732 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4733 { 4734 PCBDDCChange_ctx change_ctx; 4735 PetscErrorCode ierr; 4736 4737 PetscFunctionBegin; 4738 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4739 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4740 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4741 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4742 PetscFunctionReturn(0); 4743 } 4744 4745 #undef __FUNCT__ 4746 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4747 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4748 { 4749 PCBDDCChange_ctx change_ctx; 4750 PetscErrorCode ierr; 4751 4752 PetscFunctionBegin; 4753 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4754 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4755 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4756 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4757 PetscFunctionReturn(0); 4758 } 4759 4760 #undef __FUNCT__ 4761 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4762 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4763 { 4764 PC_IS *pcis=(PC_IS*)pc->data; 4765 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4766 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4767 Mat S_j; 4768 PetscInt *used_xadj,*used_adjncy; 4769 PetscBool free_used_adj; 4770 PetscErrorCode ierr; 4771 4772 PetscFunctionBegin; 4773 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4774 free_used_adj = PETSC_FALSE; 4775 if (pcbddc->sub_schurs_layers == -1) { 4776 used_xadj = NULL; 4777 used_adjncy = NULL; 4778 } else { 4779 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4780 used_xadj = pcbddc->mat_graph->xadj; 4781 used_adjncy = pcbddc->mat_graph->adjncy; 4782 } else if (pcbddc->computed_rowadj) { 4783 used_xadj = pcbddc->mat_graph->xadj; 4784 used_adjncy = pcbddc->mat_graph->adjncy; 4785 } else { 4786 PetscBool flg_row=PETSC_FALSE; 4787 const PetscInt *xadj,*adjncy; 4788 PetscInt nvtxs; 4789 4790 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4791 if (flg_row) { 4792 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4793 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4794 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4795 free_used_adj = PETSC_TRUE; 4796 } else { 4797 pcbddc->sub_schurs_layers = -1; 4798 used_xadj = NULL; 4799 used_adjncy = NULL; 4800 } 4801 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4802 } 4803 } 4804 4805 /* setup sub_schurs data */ 4806 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4807 if (!sub_schurs->use_mumps) { 4808 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4809 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4810 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4811 } else { 4812 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4813 PetscBool isseqaij; 4814 if (!pcbddc->use_vertices && reuse_solvers) { 4815 PetscInt n_vertices; 4816 4817 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4818 reuse_solvers = (PetscBool)!n_vertices; 4819 } 4820 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4821 if (!isseqaij) { 4822 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4823 if (matis->A == pcbddc->local_mat) { 4824 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4825 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4826 } else { 4827 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4828 } 4829 } 4830 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4831 } 4832 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4833 4834 /* free adjacency */ 4835 if (free_used_adj) { 4836 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4837 } 4838 PetscFunctionReturn(0); 4839 } 4840 4841 #undef __FUNCT__ 4842 #define __FUNCT__ "PCBDDCInitSubSchurs" 4843 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4844 { 4845 PC_IS *pcis=(PC_IS*)pc->data; 4846 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4847 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4848 PCBDDCGraph graph; 4849 PetscErrorCode ierr; 4850 4851 PetscFunctionBegin; 4852 /* attach interface graph for determining subsets */ 4853 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4854 IS verticesIS,verticescomm; 4855 PetscInt vsize,*idxs; 4856 4857 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4858 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4859 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4860 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4861 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4862 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4863 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4864 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4865 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4866 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4867 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4868 /* 4869 if (pcbddc->dbg_flag) { 4870 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4871 } 4872 */ 4873 } else { 4874 graph = pcbddc->mat_graph; 4875 } 4876 4877 /* sub_schurs init */ 4878 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4879 4880 /* free graph struct */ 4881 if (pcbddc->sub_schurs_rebuild) { 4882 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4883 } 4884 PetscFunctionReturn(0); 4885 } 4886