1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 29 30 if (pcbddc->dbg_flag) { 31 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 32 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 33 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 35 } 36 37 if (pcbddc->dbg_flag) { 38 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 39 } 40 41 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 42 43 /* max size of subsets */ 44 mss = 0; 45 for (i=0;i<sub_schurs->n_subs;i++) { 46 PetscInt subset_size; 47 48 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 49 mss = PetscMax(mss,subset_size); 50 } 51 52 /* min/max and threshold */ 53 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 54 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 55 nmax = PetscMax(nmin,nmax); 56 allocated_S_St = PETSC_FALSE; 57 if (nmin) { 58 allocated_S_St = PETSC_TRUE; 59 } 60 61 /* allocate lapack workspace */ 62 cum = cum2 = 0; 63 maxneigs = 0; 64 for (i=0;i<sub_schurs->n_subs;i++) { 65 PetscInt n,subset_size; 66 67 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 68 n = PetscMin(subset_size,nmax); 69 cum += subset_size; 70 cum2 += subset_size*n; 71 maxneigs = PetscMax(maxneigs,n); 72 } 73 if (mss) { 74 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 75 PetscBLASInt B_itype = 1; 76 PetscBLASInt B_N = mss; 77 PetscReal zero = 0.0; 78 PetscReal eps = 0.0; /* dlamch? */ 79 80 B_lwork = -1; 81 S = NULL; 82 St = NULL; 83 eigs = NULL; 84 eigv = NULL; 85 B_iwork = NULL; 86 B_ifail = NULL; 87 #if defined(PETSC_USE_COMPLEX) 88 rwork = NULL; 89 #endif 90 thresh = 1.0; 91 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 92 #if defined(PETSC_USE_COMPLEX) 93 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 94 #else 95 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 96 #endif 97 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 98 ierr = PetscFPTrapPop();CHKERRQ(ierr); 99 } else { 100 /* TODO */ 101 } 102 } else { 103 lwork = 0; 104 } 105 106 nv = 0; 107 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 108 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 109 } 110 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 111 if (allocated_S_St) { 112 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 113 } 114 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 115 #if defined(PETSC_USE_COMPLEX) 116 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 117 #endif 118 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 119 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 121 nv+cum,&pcbddc->adaptive_constraints_idxs, 122 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 123 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 124 125 maxneigs = 0; 126 cum = cumarray = 0; 127 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 128 pcbddc->adaptive_constraints_data_ptr[0] = 0; 129 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 130 const PetscInt *idxs; 131 132 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 133 for (cum=0;cum<nv;cum++) { 134 pcbddc->adaptive_constraints_n[cum] = 1; 135 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 136 pcbddc->adaptive_constraints_data[cum] = 1.0; 137 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 138 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 139 } 140 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 141 } 142 143 if (mss) { /* multilevel */ 144 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 145 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 146 } 147 148 for (i=0;i<sub_schurs->n_subs;i++) { 149 150 const PetscInt *idxs; 151 PetscReal infty = PETSC_MAX_REAL; 152 PetscInt j,subset_size,eigs_start = 0; 153 PetscBLASInt B_N; 154 PetscBool same_data = PETSC_FALSE; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 158 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 159 if (sub_schurs->is_hermitian) { 160 PetscInt j,k; 161 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 162 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 163 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 } 165 for (j=0;j<subset_size;j++) { 166 for (k=j;k<subset_size;k++) { 167 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 168 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 169 } 170 } 171 } else { 172 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 173 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 174 } 175 } else { 176 S = Sarray + cumarray; 177 St = Starray + cumarray; 178 } 179 180 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 181 /* see if we can save some work */ 182 if (sub_schurs->n_subs == 1) { 183 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 184 } 185 186 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 187 B_neigs = 0; 188 } else { 189 /* Threshold: this is an heuristic for edges */ 190 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 191 192 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 193 PetscBLASInt B_itype = 1; 194 PetscBLASInt B_IL, B_IU; 195 PetscReal eps = -1.0; /* dlamch? */ 196 PetscInt nmin_s; 197 198 /* ask for eigenvalues larger than thresh */ 199 if (pcbddc->dbg_flag) { 200 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 201 } 202 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 203 #if defined(PETSC_USE_COMPLEX) 204 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 205 #else 206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 207 #endif 208 ierr = PetscFPTrapPop();CHKERRQ(ierr); 209 if (B_ierr) { 210 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 211 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 212 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 213 } 214 215 if (B_neigs > nmax) { 216 if (pcbddc->dbg_flag) { 217 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 218 } 219 eigs_start = B_neigs -nmax; 220 B_neigs = nmax; 221 } 222 223 nmin_s = PetscMin(nmin,B_N); 224 if (B_neigs < nmin_s) { 225 PetscBLASInt B_neigs2; 226 227 B_IU = B_N - B_neigs; 228 B_IL = B_N - nmin_s + 1; 229 if (pcbddc->dbg_flag) { 230 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 231 } 232 if (sub_schurs->is_hermitian) { 233 PetscInt j; 234 for (j=0;j<subset_size;j++) { 235 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 236 } 237 for (j=0;j<subset_size;j++) { 238 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 239 } 240 } else { 241 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 242 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 } 244 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 245 #if defined(PETSC_USE_COMPLEX) 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 247 #else 248 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 249 #endif 250 ierr = PetscFPTrapPop();CHKERRQ(ierr); 251 B_neigs += B_neigs2; 252 } 253 if (B_ierr) { 254 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 255 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 257 } 258 if (pcbddc->dbg_flag) { 259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 260 for (j=0;j<B_neigs;j++) { 261 if (eigs[j] == 0.0) { 262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 263 } else { 264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 265 } 266 } 267 } 268 } else { 269 /* TODO */ 270 } 271 } 272 maxneigs = PetscMax(B_neigs,maxneigs); 273 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 274 if (B_neigs) { 275 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 276 277 if (pcbddc->dbg_flag > 1) { 278 PetscInt ii; 279 for (ii=0;ii<B_neigs;ii++) { 280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 281 for (j=0;j<B_N;j++) { 282 #if defined(PETSC_USE_COMPLEX) 283 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 284 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 286 #else 287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 288 #endif 289 } 290 } 291 } 292 #if 0 293 for (j=0;j<B_neigs;j++) { 294 PetscBLASInt Blas_N,Blas_one = 1.0; 295 PetscScalar norm; 296 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 297 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 298 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 299 if (pcbddc->adaptive_constraints_data[cum] > 0.0) { 300 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 301 } else { 302 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 303 } 304 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 305 } 306 #endif 307 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 308 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 309 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 310 cum++; 311 } 312 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 313 /* shift for next computation */ 314 cumarray += subset_size*subset_size; 315 } 316 if (pcbddc->dbg_flag) { 317 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 318 } 319 320 if (mss) { 321 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 322 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 323 /* destroy matrices (junk) */ 324 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 325 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 326 } 327 if (allocated_S_St) { 328 ierr = PetscFree2(S,St);CHKERRQ(ierr); 329 } 330 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 331 #if defined(PETSC_USE_COMPLEX) 332 ierr = PetscFree(rwork);CHKERRQ(ierr); 333 #endif 334 if (pcbddc->dbg_flag) { 335 PetscInt maxneigs_r; 336 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 337 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 338 } 339 PetscFunctionReturn(0); 340 } 341 342 #undef __FUNCT__ 343 #define __FUNCT__ "PCBDDCSetUpSolvers" 344 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 345 { 346 PetscScalar *coarse_submat_vals; 347 PetscErrorCode ierr; 348 349 PetscFunctionBegin; 350 /* Setup local scatters R_to_B and (optionally) R_to_D */ 351 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 352 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 353 354 /* Setup local neumann solver ksp_R */ 355 /* PCBDDCSetUpLocalScatters should be called first! */ 356 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 357 358 /* 359 Setup local correction and local part of coarse basis. 360 Gives back the dense local part of the coarse matrix in column major ordering 361 */ 362 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 363 364 /* Compute total number of coarse nodes and setup coarse solver */ 365 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 366 367 /* free */ 368 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 372 #undef __FUNCT__ 373 #define __FUNCT__ "PCBDDCResetCustomization" 374 PetscErrorCode PCBDDCResetCustomization(PC pc) 375 { 376 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 377 PetscErrorCode ierr; 378 379 PetscFunctionBegin; 380 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 381 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 382 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 383 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 386 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 388 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 389 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "PCBDDCResetTopography" 395 PetscErrorCode PCBDDCResetTopography(PC pc) 396 { 397 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 398 PetscErrorCode ierr; 399 400 PetscFunctionBegin; 401 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 403 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 404 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 405 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "PCBDDCResetSolvers" 411 PetscErrorCode PCBDDCResetSolvers(PC pc) 412 { 413 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 414 PetscScalar *array; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 419 if (pcbddc->coarse_phi_B) { 420 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 421 ierr = PetscFree(array);CHKERRQ(ierr); 422 } 423 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 424 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 425 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 426 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 427 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 428 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 430 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 432 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 433 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 434 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 435 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 436 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 437 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 438 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 439 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 440 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 441 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 442 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 443 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 444 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 445 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 #undef __FUNCT__ 450 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 451 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 452 { 453 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 454 PC_IS *pcis = (PC_IS*)pc->data; 455 VecType impVecType; 456 PetscInt n_constraints,n_R,old_size; 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 461 /* get sizes */ 462 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 463 n_R = pcis->n-pcbddc->n_vertices; 464 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 465 /* local work vectors (try to avoid unneeded work)*/ 466 /* R nodes */ 467 old_size = -1; 468 if (pcbddc->vec1_R) { 469 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 470 } 471 if (n_R != old_size) { 472 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 473 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 474 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 475 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 476 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 477 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 478 } 479 /* local primal dofs */ 480 old_size = -1; 481 if (pcbddc->vec1_P) { 482 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 483 } 484 if (pcbddc->local_primal_size != old_size) { 485 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 486 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 487 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 488 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 489 } 490 /* local explicit constraints */ 491 old_size = -1; 492 if (pcbddc->vec1_C) { 493 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 494 } 495 if (n_constraints && n_constraints != old_size) { 496 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 497 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 498 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 499 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 500 } 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "PCBDDCSetUpCorrection" 506 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 507 { 508 PetscErrorCode ierr; 509 /* pointers to pcis and pcbddc */ 510 PC_IS* pcis = (PC_IS*)pc->data; 511 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 512 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 513 /* submatrices of local problem */ 514 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 515 /* submatrices of local coarse problem */ 516 Mat S_VV,S_CV,S_VC,S_CC; 517 /* working matrices */ 518 Mat C_CR; 519 /* additional working stuff */ 520 PC pc_R; 521 Mat F; 522 PetscBool isLU,isCHOL,isILU; 523 524 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 525 PetscScalar *work; 526 PetscInt *idx_V_B; 527 PetscInt n,n_vertices,n_constraints; 528 PetscInt i,n_R,n_D,n_B; 529 PetscBool unsymmetric_check; 530 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 531 MatType impMatType; 532 /* some shortcuts to scalars */ 533 PetscScalar one=1.0,m_one=-1.0; 534 535 PetscFunctionBegin; 536 n_vertices = pcbddc->n_vertices; 537 n_constraints = pcbddc->local_primal_size-n_vertices; 538 /* Set Non-overlapping dimensions */ 539 n_B = pcis->n_B; 540 n_D = pcis->n - n_B; 541 n_R = pcis->n - n_vertices; 542 543 /* Set types for local objects needed by BDDC precondtioner */ 544 impMatType = MATSEQDENSE; 545 546 /* vertices in boundary numbering */ 547 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 548 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 549 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 550 551 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 552 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 553 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 554 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 555 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 556 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 557 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 558 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 560 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 561 562 unsymmetric_check = PETSC_FALSE; 563 /* allocate workspace */ 564 n = 0; 565 if (n_constraints) { 566 n += n_R*n_constraints; 567 } 568 if (n_vertices) { 569 n = PetscMax(2*n_R*n_vertices,n); 570 n = PetscMax((n_R+n_B)*n_vertices,n); 571 } 572 if (!pcbddc->symmetric_primal) { 573 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 574 unsymmetric_check = PETSC_TRUE; 575 } 576 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 577 578 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 579 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 580 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 581 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 582 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 583 if (isLU || isILU || isCHOL) { 584 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 585 } else if (sub_schurs->reuse_mumps) { 586 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 587 MatFactorType type; 588 589 F = reuse_mumps->F; 590 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 591 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 592 } else { 593 F = NULL; 594 } 595 596 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 597 if (n_constraints) { 598 Mat M1,M2,M3; 599 Mat auxmat; 600 IS is_aux; 601 PetscScalar *array,*array2; 602 603 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 604 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 605 606 /* Extract constraints on R nodes: C_{CR} */ 607 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 608 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 609 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 610 611 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 612 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 613 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 614 for (i=0;i<n_constraints;i++) { 615 const PetscScalar *row_cmat_values; 616 const PetscInt *row_cmat_indices; 617 PetscInt size_of_constraint,j; 618 619 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 620 for (j=0;j<size_of_constraint;j++) { 621 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 622 } 623 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 624 } 625 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 626 if (F) { 627 Mat B; 628 629 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 630 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 631 ierr = MatDestroy(&B);CHKERRQ(ierr); 632 } else { 633 PetscScalar *marr; 634 635 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 636 for (i=0;i<n_constraints;i++) { 637 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 638 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 639 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 640 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 641 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 642 } 643 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 } 645 if (!pcbddc->switch_static) { 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 647 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 648 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 649 for (i=0;i<n_constraints;i++) { 650 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 651 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 652 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 653 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 654 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 655 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 656 } 657 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 658 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 659 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 660 } else { 661 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 662 pcbddc->local_auxmat2 = local_auxmat2_R; 663 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 664 } 665 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 666 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 667 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 668 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 669 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 670 if (isCHOL) { 671 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 672 } else { 673 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 674 } 675 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 676 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 677 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 678 ierr = MatDestroy(&M2);CHKERRQ(ierr); 679 ierr = MatDestroy(&M3);CHKERRQ(ierr); 680 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 681 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 682 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 683 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 684 ierr = MatDestroy(&M1);CHKERRQ(ierr); 685 } 686 /* Get submatrices from subdomain matrix */ 687 if (n_vertices) { 688 IS is_aux; 689 690 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 691 IS tis; 692 693 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 694 ierr = ISSort(tis);CHKERRQ(ierr); 695 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 696 ierr = ISDestroy(&tis);CHKERRQ(ierr); 697 } else { 698 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 699 } 700 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 701 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 702 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 703 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 704 } 705 706 /* Matrix of coarse basis functions (local) */ 707 if (pcbddc->coarse_phi_B) { 708 PetscInt on_B,on_primal,on_D=n_D; 709 if (pcbddc->coarse_phi_D) { 710 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 711 } 712 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 713 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 714 PetscScalar *marray; 715 716 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 717 ierr = PetscFree(marray);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 720 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 721 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 722 } 723 } 724 725 if (!pcbddc->coarse_phi_B) { 726 PetscScalar *marray; 727 728 n = n_B*pcbddc->local_primal_size; 729 if (pcbddc->switch_static || pcbddc->dbg_flag) { 730 n += n_D*pcbddc->local_primal_size; 731 } 732 if (!pcbddc->symmetric_primal) { 733 n *= 2; 734 } 735 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 736 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 737 n = n_B*pcbddc->local_primal_size; 738 if (pcbddc->switch_static || pcbddc->dbg_flag) { 739 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 740 n += n_D*pcbddc->local_primal_size; 741 } 742 if (!pcbddc->symmetric_primal) { 743 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 744 if (pcbddc->switch_static || pcbddc->dbg_flag) { 745 n = n_B*pcbddc->local_primal_size; 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 747 } 748 } else { 749 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 750 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 751 if (pcbddc->switch_static || pcbddc->dbg_flag) { 752 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 753 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 754 } 755 } 756 } 757 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 758 /* vertices */ 759 if (n_vertices) { 760 761 ierr = MatConvert(A_VV,impMatType,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 762 763 if (n_R) { 764 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 765 PetscBLASInt B_N,B_one = 1; 766 PetscScalar *x,*y; 767 PetscBool isseqaij; 768 769 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 770 ierr = MatConvert(A_RV,impMatType,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 771 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 772 if (F) { /* TODO could be optimized for symmetric problems */ 773 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 774 } else { 775 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 776 for (i=0;i<n_vertices;i++) { 777 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 778 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 779 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 780 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 781 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 782 } 783 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 784 } 785 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 786 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 787 if (n_constraints) { 788 Mat B; 789 790 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 791 for (i=0;i<n_vertices;i++) { 792 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 793 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 794 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 795 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 796 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 797 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 798 } 799 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 800 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 801 ierr = MatDestroy(&B);CHKERRQ(ierr); 802 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 803 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 804 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 805 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 806 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 807 ierr = MatDestroy(&B);CHKERRQ(ierr); 808 } 809 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 810 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 811 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 812 } 813 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 814 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 815 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 816 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 817 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 818 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 819 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 820 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 821 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 822 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 823 } else { 824 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 825 } 826 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 827 /* coarse basis functions */ 828 for (i=0;i<n_vertices;i++) { 829 PetscScalar *y; 830 831 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 832 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 833 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 y[n_B*i+idx_V_B[i]] = 1.0; 837 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 838 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 839 840 if (pcbddc->switch_static || pcbddc->dbg_flag) { 841 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 842 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 843 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 845 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 846 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 847 } 848 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 849 } 850 /* if n_R == 0 the object is not destroyed */ 851 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 852 } 853 854 if (n_constraints) { 855 Mat B; 856 857 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 858 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 859 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 860 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 861 if (n_vertices) { 862 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 863 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 864 } else { 865 Mat S_VCt; 866 867 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 868 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 869 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 870 } 871 } 872 ierr = MatDestroy(&B);CHKERRQ(ierr); 873 /* coarse basis functions */ 874 for (i=0;i<n_constraints;i++) { 875 PetscScalar *y; 876 877 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 878 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 879 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 880 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 883 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 884 if (pcbddc->switch_static || pcbddc->dbg_flag) { 885 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 886 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 887 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 888 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 890 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 891 } 892 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 893 } 894 } 895 if (n_constraints) { 896 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 897 } 898 899 /* compute other basis functions for non-symmetric problems */ 900 if (!pcbddc->symmetric_primal) { 901 902 if (n_constraints) { 903 Mat S_CCT,B_C; 904 905 /* this is a lazy thing */ 906 ierr = MatConvert(C_CR,impMatType,MAT_INPLACE_MATRIX,&C_CR);CHKERRQ(ierr); 907 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 908 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 909 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 910 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 911 if (n_vertices) { 912 Mat B_V,S_VCT; 913 914 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 915 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 916 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 917 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 918 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 919 } 920 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 921 } else { /* if there are no constraints, reset work */ 922 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 923 } 924 if (n_vertices && n_R) { 925 Mat A_VRT; 926 PetscScalar *marray; 927 PetscBLASInt B_N,B_one = 1; 928 929 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 930 ierr = MatConvert(A_VRT,impMatType,MAT_INPLACE_MATRIX,&A_VRT);CHKERRQ(ierr); 931 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 932 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 933 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 934 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 935 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 936 } 937 938 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 939 for (i=0;i<pcbddc->local_primal_size;i++) { 940 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 941 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 942 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 943 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 944 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 945 } 946 } else { 947 for (i=0;i<pcbddc->local_primal_size;i++) { 948 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 949 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 950 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 951 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 952 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 953 } 954 } 955 /* coarse basis functions */ 956 for (i=0;i<pcbddc->local_primal_size;i++) { 957 PetscScalar *y; 958 959 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 960 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 961 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 962 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 964 if (i<n_vertices) { 965 y[n_B*i+idx_V_B[i]] = 1.0; 966 } 967 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 968 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 969 970 if (pcbddc->switch_static || pcbddc->dbg_flag) { 971 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 972 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 973 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 974 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 976 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 977 } 978 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 979 } 980 } 981 /* free memory */ 982 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 983 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 984 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 985 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 986 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 987 ierr = PetscFree(work);CHKERRQ(ierr); 988 if (n_vertices) { 989 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 990 } 991 if (n_constraints) { 992 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 993 } 994 /* Checking coarse_sub_mat and coarse basis functios */ 995 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 996 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 997 if (pcbddc->dbg_flag) { 998 Mat coarse_sub_mat; 999 Mat AUXMAT,TM1,TM2,TM3,TM4; 1000 Mat coarse_phi_D,coarse_phi_B; 1001 Mat coarse_psi_D,coarse_psi_B; 1002 Mat A_II,A_BB,A_IB,A_BI; 1003 Mat C_B,CPHI; 1004 IS is_dummy; 1005 Vec mones; 1006 MatType checkmattype=MATSEQAIJ; 1007 PetscReal real_value; 1008 1009 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1010 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1011 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1012 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1013 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1014 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1015 if (unsymmetric_check) { 1016 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1017 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1018 } 1019 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1020 1021 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1022 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1023 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1024 if (unsymmetric_check) { 1025 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1026 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1027 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1028 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1029 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1030 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1031 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1032 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1033 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1034 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1035 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1036 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1037 } else { 1038 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1039 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1040 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1041 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1042 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1043 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1044 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1045 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1046 } 1047 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1048 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1049 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1050 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 1051 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1052 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1053 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1054 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1055 1056 /* check constraints */ 1057 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1058 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1059 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1060 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1061 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1062 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1063 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1064 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1065 if (unsymmetric_check) { 1066 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1067 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1068 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1069 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1070 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1071 } 1072 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1073 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1074 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1075 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1076 1077 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1078 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1079 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1080 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1081 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1082 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1083 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1084 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1085 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1086 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1087 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1088 if (unsymmetric_check) { 1089 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1090 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1091 } 1092 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1093 } 1094 /* get back data */ 1095 *coarse_submat_vals_n = coarse_submat_vals; 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1101 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1102 { 1103 Mat *work_mat; 1104 IS isrow_s,iscol_s; 1105 PetscBool rsorted,csorted; 1106 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1111 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1112 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1113 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1114 1115 if (!rsorted) { 1116 const PetscInt *idxs; 1117 PetscInt *idxs_sorted,i; 1118 1119 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1120 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1121 for (i=0;i<rsize;i++) { 1122 idxs_perm_r[i] = i; 1123 } 1124 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1125 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1126 for (i=0;i<rsize;i++) { 1127 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1128 } 1129 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1130 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1131 } else { 1132 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1133 isrow_s = isrow; 1134 } 1135 1136 if (!csorted) { 1137 if (isrow == iscol) { 1138 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1139 iscol_s = isrow_s; 1140 } else { 1141 const PetscInt *idxs; 1142 PetscInt *idxs_sorted,i; 1143 1144 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1145 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1146 for (i=0;i<csize;i++) { 1147 idxs_perm_c[i] = i; 1148 } 1149 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1150 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1151 for (i=0;i<csize;i++) { 1152 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1153 } 1154 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1155 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1156 } 1157 } else { 1158 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1159 iscol_s = iscol; 1160 } 1161 1162 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1163 1164 if (!rsorted || !csorted) { 1165 Mat new_mat; 1166 IS is_perm_r,is_perm_c; 1167 1168 if (!rsorted) { 1169 PetscInt *idxs_r,i; 1170 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1171 for (i=0;i<rsize;i++) { 1172 idxs_r[idxs_perm_r[i]] = i; 1173 } 1174 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1175 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1176 } else { 1177 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1178 } 1179 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1180 1181 if (!csorted) { 1182 if (isrow_s == iscol_s) { 1183 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1184 is_perm_c = is_perm_r; 1185 } else { 1186 PetscInt *idxs_c,i; 1187 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1188 for (i=0;i<csize;i++) { 1189 idxs_c[idxs_perm_c[i]] = i; 1190 } 1191 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1192 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1193 } 1194 } else { 1195 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1196 } 1197 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1198 1199 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1200 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1201 work_mat[0] = new_mat; 1202 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1203 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1204 } 1205 1206 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1207 *B = work_mat[0]; 1208 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1209 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1210 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #undef __FUNCT__ 1215 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1216 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1217 { 1218 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1219 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1220 Mat new_mat; 1221 IS is_local,is_global; 1222 PetscInt local_size; 1223 PetscBool isseqaij; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1228 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1229 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1230 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1231 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1232 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1233 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1234 1235 /* check */ 1236 if (pcbddc->dbg_flag) { 1237 Vec x,x_change; 1238 PetscReal error; 1239 1240 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1241 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1242 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1243 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1246 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1247 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1248 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1249 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1250 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1251 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1252 ierr = VecDestroy(&x);CHKERRQ(ierr); 1253 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1254 } 1255 1256 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1257 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1258 if (isseqaij) { 1259 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1260 } else { 1261 Mat work_mat; 1262 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1263 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1264 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1265 } 1266 if (matis->A->symmetric_set) { 1267 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1268 #if !defined(PETSC_USE_COMPLEX) 1269 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1270 #endif 1271 } 1272 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1278 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1279 { 1280 PC_IS* pcis = (PC_IS*)(pc->data); 1281 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1282 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1283 PetscInt *idx_R_local=NULL; 1284 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1285 PetscInt vbs,bs; 1286 PetscBT bitmask=NULL; 1287 PetscErrorCode ierr; 1288 1289 PetscFunctionBegin; 1290 /* 1291 No need to setup local scatters if 1292 - primal space is unchanged 1293 AND 1294 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1295 AND 1296 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1297 */ 1298 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1299 PetscFunctionReturn(0); 1300 } 1301 /* destroy old objects */ 1302 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1303 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1304 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1305 /* Set Non-overlapping dimensions */ 1306 n_B = pcis->n_B; 1307 n_D = pcis->n - n_B; 1308 n_vertices = pcbddc->n_vertices; 1309 1310 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1311 1312 /* create auxiliary bitmask and allocate workspace */ 1313 if (!sub_schurs->reuse_mumps) { 1314 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1315 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1316 for (i=0;i<n_vertices;i++) { 1317 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1318 } 1319 1320 for (i=0, n_R=0; i<pcis->n; i++) { 1321 if (!PetscBTLookup(bitmask,i)) { 1322 idx_R_local[n_R++] = i; 1323 } 1324 } 1325 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1326 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1327 1328 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1329 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1330 } 1331 1332 /* Block code */ 1333 vbs = 1; 1334 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1335 if (bs>1 && !(n_vertices%bs)) { 1336 PetscBool is_blocked = PETSC_TRUE; 1337 PetscInt *vary; 1338 if (!sub_schurs->reuse_mumps) { 1339 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1340 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1341 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1342 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1343 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1344 for (i=0; i<pcis->n/bs; i++) { 1345 if (vary[i]!=0 && vary[i]!=bs) { 1346 is_blocked = PETSC_FALSE; 1347 break; 1348 } 1349 } 1350 ierr = PetscFree(vary);CHKERRQ(ierr); 1351 } else { 1352 /* Verify directly the R set */ 1353 for (i=0; i<n_R/bs; i++) { 1354 PetscInt j,node=idx_R_local[bs*i]; 1355 for (j=1; j<bs; j++) { 1356 if (node != idx_R_local[bs*i+j]-j) { 1357 is_blocked = PETSC_FALSE; 1358 break; 1359 } 1360 } 1361 } 1362 } 1363 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1364 vbs = bs; 1365 for (i=0;i<n_R/vbs;i++) { 1366 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1367 } 1368 } 1369 } 1370 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1371 if (sub_schurs->reuse_mumps) { 1372 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1373 1374 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1375 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1376 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1377 reuse_mumps->is_R = pcbddc->is_R_local; 1378 } else { 1379 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1380 } 1381 1382 /* print some info if requested */ 1383 if (pcbddc->dbg_flag) { 1384 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1385 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1386 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1387 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1388 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1389 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1390 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1391 } 1392 1393 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1394 if (!sub_schurs->reuse_mumps) { 1395 IS is_aux1,is_aux2; 1396 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1397 1398 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1399 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1400 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1401 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1402 for (i=0; i<n_D; i++) { 1403 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1404 } 1405 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1406 for (i=0, j=0; i<n_R; i++) { 1407 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1408 aux_array1[j++] = i; 1409 } 1410 } 1411 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1412 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1413 for (i=0, j=0; i<n_B; i++) { 1414 if (!PetscBTLookup(bitmask,is_indices[i])) { 1415 aux_array2[j++] = i; 1416 } 1417 } 1418 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1419 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1420 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1421 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1422 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1423 1424 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1425 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1426 for (i=0, j=0; i<n_R; i++) { 1427 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1428 aux_array1[j++] = i; 1429 } 1430 } 1431 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1432 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1433 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1434 } 1435 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1436 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1437 } else { 1438 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1439 IS tis; 1440 PetscInt schur_size; 1441 1442 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1443 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1444 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1445 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1446 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1447 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1448 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1449 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1450 } 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1458 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1459 { 1460 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1461 PC_IS *pcis = (PC_IS*)pc->data; 1462 PC pc_temp; 1463 Mat A_RR; 1464 MatReuse reuse; 1465 PetscScalar m_one = -1.0; 1466 PetscReal value; 1467 PetscInt n_D,n_R; 1468 PetscBool check_corr[2],issbaij; 1469 PetscErrorCode ierr; 1470 /* prefixes stuff */ 1471 char dir_prefix[256],neu_prefix[256],str_level[16]; 1472 size_t len; 1473 1474 PetscFunctionBegin; 1475 1476 /* compute prefixes */ 1477 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1478 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1479 if (!pcbddc->current_level) { 1480 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1481 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1482 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1483 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1484 } else { 1485 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1486 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1487 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1488 len -= 15; /* remove "pc_bddc_coarse_" */ 1489 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1490 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1491 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1492 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1493 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1494 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1495 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1496 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1497 } 1498 1499 /* DIRICHLET PROBLEM */ 1500 if (dirichlet) { 1501 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1502 if (pcbddc->local_mat->symmetric_set) { 1503 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1504 } 1505 /* Matrix for Dirichlet problem is pcis->A_II */ 1506 n_D = pcis->n - pcis->n_B; 1507 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1508 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1509 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1510 /* default */ 1511 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1512 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1513 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1514 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1515 if (issbaij) { 1516 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1517 } else { 1518 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1519 } 1520 /* Allow user's customization */ 1521 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1522 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1523 } 1524 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1525 if (sub_schurs->reuse_mumps) { 1526 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1527 1528 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1529 } 1530 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1531 if (!n_D) { 1532 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1533 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1534 } 1535 /* Set Up KSP for Dirichlet problem of BDDC */ 1536 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1537 /* set ksp_D into pcis data */ 1538 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1539 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1540 pcis->ksp_D = pcbddc->ksp_D; 1541 } 1542 1543 /* NEUMANN PROBLEM */ 1544 A_RR = 0; 1545 if (neumann) { 1546 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1547 PetscInt ibs,mbs; 1548 PetscBool issbaij; 1549 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1550 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1551 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1552 if (pcbddc->ksp_R) { /* already created ksp */ 1553 PetscInt nn_R; 1554 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1555 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1556 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1557 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1558 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1559 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1560 reuse = MAT_INITIAL_MATRIX; 1561 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1562 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1563 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1564 reuse = MAT_INITIAL_MATRIX; 1565 } else { /* safe to reuse the matrix */ 1566 reuse = MAT_REUSE_MATRIX; 1567 } 1568 } 1569 /* last check */ 1570 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1571 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1572 reuse = MAT_INITIAL_MATRIX; 1573 } 1574 } else { /* first time, so we need to create the matrix */ 1575 reuse = MAT_INITIAL_MATRIX; 1576 } 1577 /* extract A_RR */ 1578 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1579 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1580 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1581 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1582 if (matis->A == pcbddc->local_mat) { 1583 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1584 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1585 } else { 1586 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1587 } 1588 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1589 if (matis->A == pcbddc->local_mat) { 1590 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1591 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1592 } else { 1593 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1594 } 1595 } 1596 if (!sub_schurs->reuse_mumps) { 1597 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1598 if (pcbddc->local_mat->symmetric_set) { 1599 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1600 } 1601 } else { 1602 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1603 1604 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1605 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1606 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1607 } 1608 if (!pcbddc->ksp_R) { /* create object if not present */ 1609 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1610 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1611 /* default */ 1612 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1613 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1614 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1615 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1616 if (issbaij) { 1617 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1618 } else { 1619 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1620 } 1621 /* Allow user's customization */ 1622 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1623 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1624 } 1625 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1626 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1627 if (!n_R) { 1628 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1629 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1630 } 1631 /* Reuse MUMPS solver if it is present */ 1632 if (sub_schurs->reuse_mumps) { 1633 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1634 1635 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1636 } 1637 /* Set Up KSP for Neumann problem of BDDC */ 1638 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1639 } 1640 /* free Neumann problem's matrix */ 1641 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1642 1643 if (pcbddc->dbg_flag) { 1644 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1645 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1646 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1647 } 1648 1649 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 1650 check_corr[0] = check_corr[1] = PETSC_FALSE; 1651 if (pcbddc->NullSpace_corr[0]) { 1652 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 1653 } 1654 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 1655 check_corr[0] = PETSC_TRUE; 1656 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 1657 } 1658 if (neumann && pcbddc->NullSpace_corr[2]) { 1659 check_corr[1] = PETSC_TRUE; 1660 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 1661 } 1662 1663 /* check Dirichlet and Neumann solvers */ 1664 if (pcbddc->dbg_flag) { 1665 if (dirichlet) { /* Dirichlet */ 1666 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1667 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1668 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1669 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1670 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1671 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1672 if (check_corr[0]) { 1673 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 1674 } 1675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1676 } 1677 if (neumann) { /* Neumann */ 1678 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1679 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1680 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1681 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1682 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1683 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1685 if (check_corr[1]) { 1686 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 1687 } 1688 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1689 } 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1696 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1697 { 1698 PetscErrorCode ierr; 1699 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1700 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1701 1702 PetscFunctionBegin; 1703 if (!sub_schurs->reuse_mumps) { 1704 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1705 } 1706 if (!pcbddc->switch_static) { 1707 if (applytranspose && pcbddc->local_auxmat1) { 1708 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1709 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1710 } 1711 if (!sub_schurs->reuse_mumps) { 1712 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1713 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1714 } else { 1715 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1716 1717 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1718 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1719 } 1720 } else { 1721 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1722 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1723 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1724 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1725 if (applytranspose && pcbddc->local_auxmat1) { 1726 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1727 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1728 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1729 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1730 } 1731 } 1732 if (!sub_schurs->reuse_mumps) { 1733 if (applytranspose) { 1734 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1735 } else { 1736 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1737 } 1738 } else { 1739 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1740 1741 if (applytranspose) { 1742 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1743 } else { 1744 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1745 } 1746 } 1747 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1748 if (!pcbddc->switch_static) { 1749 if (!sub_schurs->reuse_mumps) { 1750 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1751 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1752 } else { 1753 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1754 1755 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1756 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1757 } 1758 if (!applytranspose && pcbddc->local_auxmat1) { 1759 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1760 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1761 } 1762 } else { 1763 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1764 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1765 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1766 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1767 if (!applytranspose && pcbddc->local_auxmat1) { 1768 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1769 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1770 } 1771 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1772 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1773 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1774 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1775 } 1776 PetscFunctionReturn(0); 1777 } 1778 1779 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1782 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1783 { 1784 PetscErrorCode ierr; 1785 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1786 PC_IS* pcis = (PC_IS*) (pc->data); 1787 const PetscScalar zero = 0.0; 1788 1789 PetscFunctionBegin; 1790 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1791 if (applytranspose) { 1792 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1793 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1794 } else { 1795 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1796 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1797 } 1798 /* start communications from local primal nodes to rhs of coarse solver */ 1799 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1800 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1802 1803 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1804 /* TODO remove null space when doing multilevel */ 1805 if (pcbddc->coarse_ksp) { 1806 Vec rhs,sol; 1807 1808 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1809 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1810 if (applytranspose) { 1811 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1812 } else { 1813 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1814 } 1815 } 1816 1817 /* Local solution on R nodes */ 1818 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1819 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1820 } 1821 1822 /* communications from coarse sol to local primal nodes */ 1823 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1824 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1825 1826 /* Sum contributions from two levels */ 1827 if (applytranspose) { 1828 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1829 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1830 } else { 1831 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1832 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1833 } 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1839 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1840 { 1841 PetscErrorCode ierr; 1842 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1843 PetscScalar *array; 1844 Vec from,to; 1845 1846 PetscFunctionBegin; 1847 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1848 from = pcbddc->coarse_vec; 1849 to = pcbddc->vec1_P; 1850 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1851 Vec tvec; 1852 1853 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1854 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1855 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1856 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1857 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1858 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1859 } 1860 } else { /* from local to global -> put data in coarse right hand side */ 1861 from = pcbddc->vec1_P; 1862 to = pcbddc->coarse_vec; 1863 } 1864 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1865 PetscFunctionReturn(0); 1866 } 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1870 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1871 { 1872 PetscErrorCode ierr; 1873 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1874 PetscScalar *array; 1875 Vec from,to; 1876 1877 PetscFunctionBegin; 1878 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1879 from = pcbddc->coarse_vec; 1880 to = pcbddc->vec1_P; 1881 } else { /* from local to global -> put data in coarse right hand side */ 1882 from = pcbddc->vec1_P; 1883 to = pcbddc->coarse_vec; 1884 } 1885 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1886 if (smode == SCATTER_FORWARD) { 1887 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1888 Vec tvec; 1889 1890 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1891 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1892 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1893 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1894 } 1895 } else { 1896 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1897 ierr = VecResetArray(from);CHKERRQ(ierr); 1898 } 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 1903 /* uncomment for testing purposes */ 1904 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1905 #undef __FUNCT__ 1906 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1907 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1908 { 1909 PetscErrorCode ierr; 1910 PC_IS* pcis = (PC_IS*)(pc->data); 1911 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1912 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1913 /* one and zero */ 1914 PetscScalar one=1.0,zero=0.0; 1915 /* space to store constraints and their local indices */ 1916 PetscScalar *constraints_data; 1917 PetscInt *constraints_idxs,*constraints_idxs_B; 1918 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1919 PetscInt *constraints_n; 1920 /* iterators */ 1921 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1922 /* BLAS integers */ 1923 PetscBLASInt lwork,lierr; 1924 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1925 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1926 /* reuse */ 1927 PetscInt olocal_primal_size,olocal_primal_size_cc; 1928 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1929 /* change of basis */ 1930 PetscBool qr_needed; 1931 PetscBT change_basis,qr_needed_idx; 1932 /* auxiliary stuff */ 1933 PetscInt *nnz,*is_indices; 1934 PetscInt ncc; 1935 /* some quantities */ 1936 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1937 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1938 1939 PetscFunctionBegin; 1940 /* Destroy Mat objects computed previously */ 1941 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1942 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1943 /* save info on constraints from previous setup (if any) */ 1944 olocal_primal_size = pcbddc->local_primal_size; 1945 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1946 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1947 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1948 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1949 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1950 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1951 1952 /* print some info */ 1953 if (pcbddc->dbg_flag) { 1954 IS vertices; 1955 PetscInt nv,nedges,nfaces; 1956 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1957 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1958 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1959 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1960 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1961 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1962 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1963 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1964 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1965 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1966 } 1967 1968 if (!pcbddc->adaptive_selection) { 1969 IS ISForVertices,*ISForFaces,*ISForEdges; 1970 MatNullSpace nearnullsp; 1971 const Vec *nearnullvecs; 1972 Vec *localnearnullsp; 1973 PetscScalar *array; 1974 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1975 PetscBool nnsp_has_cnst; 1976 /* LAPACK working arrays for SVD or POD */ 1977 PetscBool skip_lapack,boolforchange; 1978 PetscScalar *work; 1979 PetscReal *singular_vals; 1980 #if defined(PETSC_USE_COMPLEX) 1981 PetscReal *rwork; 1982 #endif 1983 #if defined(PETSC_MISSING_LAPACK_GESVD) 1984 PetscScalar *temp_basis,*correlation_mat; 1985 #else 1986 PetscBLASInt dummy_int=1; 1987 PetscScalar dummy_scalar=1.; 1988 #endif 1989 1990 /* Get index sets for faces, edges and vertices from graph */ 1991 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1992 /* free unneeded index sets */ 1993 if (!pcbddc->use_vertices) { 1994 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1995 } 1996 if (!pcbddc->use_edges) { 1997 for (i=0;i<n_ISForEdges;i++) { 1998 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1999 } 2000 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2001 n_ISForEdges = 0; 2002 } 2003 if (!pcbddc->use_faces) { 2004 for (i=0;i<n_ISForFaces;i++) { 2005 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2006 } 2007 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2008 n_ISForFaces = 0; 2009 } 2010 2011 /* check if near null space is attached to global mat */ 2012 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2013 if (nearnullsp) { 2014 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2015 /* remove any stored info */ 2016 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2017 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2018 /* store information for BDDC solver reuse */ 2019 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2020 pcbddc->onearnullspace = nearnullsp; 2021 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2022 for (i=0;i<nnsp_size;i++) { 2023 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2024 } 2025 } else { /* if near null space is not provided BDDC uses constants by default */ 2026 nnsp_size = 0; 2027 nnsp_has_cnst = PETSC_TRUE; 2028 } 2029 /* get max number of constraints on a single cc */ 2030 max_constraints = nnsp_size; 2031 if (nnsp_has_cnst) max_constraints++; 2032 2033 /* 2034 Evaluate maximum storage size needed by the procedure 2035 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2036 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2037 There can be multiple constraints per connected component 2038 */ 2039 n_vertices = 0; 2040 if (ISForVertices) { 2041 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2042 } 2043 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2044 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2045 2046 total_counts = n_ISForFaces+n_ISForEdges; 2047 total_counts *= max_constraints; 2048 total_counts += n_vertices; 2049 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2050 2051 total_counts = 0; 2052 max_size_of_constraint = 0; 2053 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2054 IS used_is; 2055 if (i<n_ISForEdges) { 2056 used_is = ISForEdges[i]; 2057 } else { 2058 used_is = ISForFaces[i-n_ISForEdges]; 2059 } 2060 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2061 total_counts += j; 2062 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2063 } 2064 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2065 2066 /* get local part of global near null space vectors */ 2067 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2068 for (k=0;k<nnsp_size;k++) { 2069 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2070 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2071 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 } 2073 2074 /* whether or not to skip lapack calls */ 2075 skip_lapack = PETSC_TRUE; 2076 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2077 2078 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2079 if (!skip_lapack) { 2080 PetscScalar temp_work; 2081 2082 #if defined(PETSC_MISSING_LAPACK_GESVD) 2083 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2084 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2085 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2086 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2087 #if defined(PETSC_USE_COMPLEX) 2088 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2089 #endif 2090 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2091 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2092 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2093 lwork = -1; 2094 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2095 #if !defined(PETSC_USE_COMPLEX) 2096 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2097 #else 2098 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2099 #endif 2100 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2101 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2102 #else /* on missing GESVD */ 2103 /* SVD */ 2104 PetscInt max_n,min_n; 2105 max_n = max_size_of_constraint; 2106 min_n = max_constraints; 2107 if (max_size_of_constraint < max_constraints) { 2108 min_n = max_size_of_constraint; 2109 max_n = max_constraints; 2110 } 2111 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2112 #if defined(PETSC_USE_COMPLEX) 2113 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2114 #endif 2115 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2116 lwork = -1; 2117 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2118 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2119 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2120 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2121 #if !defined(PETSC_USE_COMPLEX) 2122 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2123 #else 2124 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2125 #endif 2126 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2127 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2128 #endif /* on missing GESVD */ 2129 /* Allocate optimal workspace */ 2130 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2131 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2132 } 2133 /* Now we can loop on constraining sets */ 2134 total_counts = 0; 2135 constraints_idxs_ptr[0] = 0; 2136 constraints_data_ptr[0] = 0; 2137 /* vertices */ 2138 if (n_vertices) { 2139 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2140 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2141 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2142 for (i=0;i<n_vertices;i++) { 2143 constraints_n[total_counts] = 1; 2144 constraints_data[total_counts] = 1.0; 2145 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2146 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2147 total_counts++; 2148 } 2149 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2150 PetscBool used_vertex; 2151 for (i=0;i<n_vertices;i++) { 2152 used_vertex = PETSC_FALSE; 2153 k = 0; 2154 while (!used_vertex && k<nnsp_size) { 2155 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2156 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2157 constraints_n[total_counts] = 1; 2158 constraints_idxs[total_counts] = is_indices[i]; 2159 constraints_data[total_counts] = 1.0; 2160 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2161 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2162 total_counts++; 2163 used_vertex = PETSC_TRUE; 2164 } 2165 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2166 k++; 2167 } 2168 } 2169 } 2170 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2171 n_vertices = total_counts; 2172 } 2173 2174 /* edges and faces */ 2175 total_counts_cc = total_counts; 2176 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2177 IS used_is; 2178 PetscBool idxs_copied = PETSC_FALSE; 2179 2180 if (ncc<n_ISForEdges) { 2181 used_is = ISForEdges[ncc]; 2182 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2183 } else { 2184 used_is = ISForFaces[ncc-n_ISForEdges]; 2185 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2186 } 2187 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2188 2189 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2190 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2191 /* change of basis should not be performed on local periodic nodes */ 2192 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2193 if (nnsp_has_cnst) { 2194 PetscScalar quad_value; 2195 2196 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2197 idxs_copied = PETSC_TRUE; 2198 2199 if (!pcbddc->use_nnsp_true) { 2200 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2201 } else { 2202 quad_value = 1.0; 2203 } 2204 for (j=0;j<size_of_constraint;j++) { 2205 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2206 } 2207 temp_constraints++; 2208 total_counts++; 2209 } 2210 for (k=0;k<nnsp_size;k++) { 2211 PetscReal real_value; 2212 PetscScalar *ptr_to_data; 2213 2214 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2215 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2216 for (j=0;j<size_of_constraint;j++) { 2217 ptr_to_data[j] = array[is_indices[j]]; 2218 } 2219 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2220 /* check if array is null on the connected component */ 2221 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2222 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2223 if (real_value > 0.0) { /* keep indices and values */ 2224 temp_constraints++; 2225 total_counts++; 2226 if (!idxs_copied) { 2227 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2228 idxs_copied = PETSC_TRUE; 2229 } 2230 } 2231 } 2232 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2233 valid_constraints = temp_constraints; 2234 if (!pcbddc->use_nnsp_true && temp_constraints) { 2235 if (temp_constraints == 1) { /* just normalize the constraint */ 2236 PetscScalar norm,*ptr_to_data; 2237 2238 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2239 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2240 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2241 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2242 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2243 } else { /* perform SVD */ 2244 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2245 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2246 2247 #if defined(PETSC_MISSING_LAPACK_GESVD) 2248 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2249 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2250 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2251 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2252 from that computed using LAPACKgesvd 2253 -> This is due to a different computation of eigenvectors in LAPACKheev 2254 -> The quality of the POD-computed basis will be the same */ 2255 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2256 /* Store upper triangular part of correlation matrix */ 2257 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2258 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2259 for (j=0;j<temp_constraints;j++) { 2260 for (k=0;k<j+1;k++) { 2261 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2262 } 2263 } 2264 /* compute eigenvalues and eigenvectors of correlation matrix */ 2265 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2266 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2267 #if !defined(PETSC_USE_COMPLEX) 2268 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2269 #else 2270 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2271 #endif 2272 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2273 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2274 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2275 j = 0; 2276 while (j < temp_constraints && singular_vals[j] < tol) j++; 2277 total_counts = total_counts-j; 2278 valid_constraints = temp_constraints-j; 2279 /* scale and copy POD basis into used quadrature memory */ 2280 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2281 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2282 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2283 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2284 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2285 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2286 if (j<temp_constraints) { 2287 PetscInt ii; 2288 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2289 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2290 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2291 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2292 for (k=0;k<temp_constraints-j;k++) { 2293 for (ii=0;ii<size_of_constraint;ii++) { 2294 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2295 } 2296 } 2297 } 2298 #else /* on missing GESVD */ 2299 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2300 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2301 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2302 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2303 #if !defined(PETSC_USE_COMPLEX) 2304 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2305 #else 2306 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2307 #endif 2308 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2309 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2310 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2311 k = temp_constraints; 2312 if (k > size_of_constraint) k = size_of_constraint; 2313 j = 0; 2314 while (j < k && singular_vals[k-j-1] < tol) j++; 2315 valid_constraints = k-j; 2316 total_counts = total_counts-temp_constraints+valid_constraints; 2317 #endif /* on missing GESVD */ 2318 } 2319 } 2320 /* update pointers information */ 2321 if (valid_constraints) { 2322 constraints_n[total_counts_cc] = valid_constraints; 2323 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2324 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2325 /* set change_of_basis flag */ 2326 if (boolforchange) { 2327 PetscBTSet(change_basis,total_counts_cc); 2328 } 2329 total_counts_cc++; 2330 } 2331 } 2332 /* free workspace */ 2333 if (!skip_lapack) { 2334 ierr = PetscFree(work);CHKERRQ(ierr); 2335 #if defined(PETSC_USE_COMPLEX) 2336 ierr = PetscFree(rwork);CHKERRQ(ierr); 2337 #endif 2338 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2339 #if defined(PETSC_MISSING_LAPACK_GESVD) 2340 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2341 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2342 #endif 2343 } 2344 for (k=0;k<nnsp_size;k++) { 2345 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2346 } 2347 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2348 /* free index sets of faces, edges and vertices */ 2349 for (i=0;i<n_ISForFaces;i++) { 2350 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2351 } 2352 if (n_ISForFaces) { 2353 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2354 } 2355 for (i=0;i<n_ISForEdges;i++) { 2356 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2357 } 2358 if (n_ISForEdges) { 2359 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2360 } 2361 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2362 } else { 2363 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2364 2365 total_counts = 0; 2366 n_vertices = 0; 2367 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2368 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2369 } 2370 max_constraints = 0; 2371 total_counts_cc = 0; 2372 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2373 total_counts += pcbddc->adaptive_constraints_n[i]; 2374 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2375 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2376 } 2377 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2378 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2379 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2380 constraints_data = pcbddc->adaptive_constraints_data; 2381 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2382 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2383 total_counts_cc = 0; 2384 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2385 if (pcbddc->adaptive_constraints_n[i]) { 2386 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2387 } 2388 } 2389 #if 0 2390 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2391 for (i=0;i<total_counts_cc;i++) { 2392 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2393 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2394 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2395 printf(" %d",constraints_idxs[j]); 2396 } 2397 printf("\n"); 2398 printf("number of cc: %d\n",constraints_n[i]); 2399 } 2400 for (i=0;i<n_vertices;i++) { 2401 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2402 } 2403 for (i=0;i<sub_schurs->n_subs;i++) { 2404 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2405 } 2406 #endif 2407 2408 max_size_of_constraint = 0; 2409 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2410 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2411 /* Change of basis */ 2412 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2413 if (pcbddc->use_change_of_basis) { 2414 for (i=0;i<sub_schurs->n_subs;i++) { 2415 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2416 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2417 } 2418 } 2419 } 2420 } 2421 pcbddc->local_primal_size = total_counts; 2422 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2423 2424 /* map constraints_idxs in boundary numbering */ 2425 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2426 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 2427 2428 /* Create constraint matrix */ 2429 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2430 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2431 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2432 2433 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2434 /* determine if a QR strategy is needed for change of basis */ 2435 qr_needed = PETSC_FALSE; 2436 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2437 total_primal_vertices=0; 2438 pcbddc->local_primal_size_cc = 0; 2439 for (i=0;i<total_counts_cc;i++) { 2440 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2441 if (size_of_constraint == 1) { 2442 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2443 pcbddc->local_primal_size_cc += 1; 2444 } else if (PetscBTLookup(change_basis,i)) { 2445 for (k=0;k<constraints_n[i];k++) { 2446 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2447 } 2448 pcbddc->local_primal_size_cc += constraints_n[i]; 2449 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2450 PetscBTSet(qr_needed_idx,i); 2451 qr_needed = PETSC_TRUE; 2452 } 2453 } else { 2454 pcbddc->local_primal_size_cc += 1; 2455 } 2456 } 2457 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2458 pcbddc->n_vertices = total_primal_vertices; 2459 /* permute indices in order to have a sorted set of vertices */ 2460 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2461 2462 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2463 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2464 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2465 2466 /* nonzero structure of constraint matrix */ 2467 /* and get reference dof for local constraints */ 2468 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2469 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2470 2471 j = total_primal_vertices; 2472 total_counts = total_primal_vertices; 2473 cum = total_primal_vertices; 2474 for (i=n_vertices;i<total_counts_cc;i++) { 2475 if (!PetscBTLookup(change_basis,i)) { 2476 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2477 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2478 cum++; 2479 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2480 for (k=0;k<constraints_n[i];k++) { 2481 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2482 nnz[j+k] = size_of_constraint; 2483 } 2484 j += constraints_n[i]; 2485 } 2486 } 2487 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2488 ierr = PetscFree(nnz);CHKERRQ(ierr); 2489 2490 /* set values in constraint matrix */ 2491 for (i=0;i<total_primal_vertices;i++) { 2492 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2493 } 2494 total_counts = total_primal_vertices; 2495 for (i=n_vertices;i<total_counts_cc;i++) { 2496 if (!PetscBTLookup(change_basis,i)) { 2497 PetscInt *cols; 2498 2499 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2500 cols = constraints_idxs+constraints_idxs_ptr[i]; 2501 for (k=0;k<constraints_n[i];k++) { 2502 PetscInt row = total_counts+k; 2503 PetscScalar *vals; 2504 2505 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2506 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2507 } 2508 total_counts += constraints_n[i]; 2509 } 2510 } 2511 /* assembling */ 2512 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2513 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2514 2515 /* 2516 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2517 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2518 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 2519 */ 2520 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2521 if (pcbddc->use_change_of_basis) { 2522 /* dual and primal dofs on a single cc */ 2523 PetscInt dual_dofs,primal_dofs; 2524 /* working stuff for GEQRF */ 2525 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2526 PetscBLASInt lqr_work; 2527 /* working stuff for UNGQR */ 2528 PetscScalar *gqr_work,lgqr_work_t; 2529 PetscBLASInt lgqr_work; 2530 /* working stuff for TRTRS */ 2531 PetscScalar *trs_rhs; 2532 PetscBLASInt Blas_NRHS; 2533 /* pointers for values insertion into change of basis matrix */ 2534 PetscInt *start_rows,*start_cols; 2535 PetscScalar *start_vals; 2536 /* working stuff for values insertion */ 2537 PetscBT is_primal; 2538 PetscInt *aux_primal_numbering_B; 2539 /* matrix sizes */ 2540 PetscInt global_size,local_size; 2541 /* temporary change of basis */ 2542 Mat localChangeOfBasisMatrix; 2543 /* extra space for debugging */ 2544 PetscScalar *dbg_work; 2545 2546 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2547 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2548 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2549 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2550 /* nonzeros for local mat */ 2551 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2552 for (i=0;i<pcis->n;i++) nnz[i]=1; 2553 for (i=n_vertices;i<total_counts_cc;i++) { 2554 if (PetscBTLookup(change_basis,i)) { 2555 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2556 if (PetscBTLookup(qr_needed_idx,i)) { 2557 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2558 } else { 2559 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2560 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2561 } 2562 } 2563 } 2564 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2565 ierr = PetscFree(nnz);CHKERRQ(ierr); 2566 /* Set initial identity in the matrix */ 2567 for (i=0;i<pcis->n;i++) { 2568 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2569 } 2570 2571 if (pcbddc->dbg_flag) { 2572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2573 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2574 } 2575 2576 2577 /* Now we loop on the constraints which need a change of basis */ 2578 /* 2579 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2580 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2581 2582 Basic blocks of change of basis matrix T computed by 2583 2584 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2585 2586 | 1 0 ... 0 s_1/S | 2587 | 0 1 ... 0 s_2/S | 2588 | ... | 2589 | 0 ... 1 s_{n-1}/S | 2590 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2591 2592 with S = \sum_{i=1}^n s_i^2 2593 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2594 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2595 2596 - QR decomposition of constraints otherwise 2597 */ 2598 if (qr_needed) { 2599 /* space to store Q */ 2600 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2601 /* first we issue queries for optimal work */ 2602 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2603 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2604 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2605 lqr_work = -1; 2606 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2607 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2608 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2609 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2610 lgqr_work = -1; 2611 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2612 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2613 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2614 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2615 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2616 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2617 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2618 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2619 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2620 /* array to store scaling factors for reflectors */ 2621 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2622 /* array to store rhs and solution of triangular solver */ 2623 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2624 /* allocating workspace for check */ 2625 if (pcbddc->dbg_flag) { 2626 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2627 } 2628 } 2629 /* array to store whether a node is primal or not */ 2630 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2631 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2632 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2633 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 2634 for (i=0;i<total_primal_vertices;i++) { 2635 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2636 } 2637 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2638 2639 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2640 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2641 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2642 if (PetscBTLookup(change_basis,total_counts)) { 2643 /* get constraint info */ 2644 primal_dofs = constraints_n[total_counts]; 2645 dual_dofs = size_of_constraint-primal_dofs; 2646 2647 if (pcbddc->dbg_flag) { 2648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2649 } 2650 2651 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2652 2653 /* copy quadrature constraints for change of basis check */ 2654 if (pcbddc->dbg_flag) { 2655 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2656 } 2657 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2658 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2659 2660 /* compute QR decomposition of constraints */ 2661 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2662 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2663 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2664 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2665 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2666 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2667 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2668 2669 /* explictly compute R^-T */ 2670 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2671 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2672 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2673 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2674 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2675 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2676 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2677 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2678 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2679 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2680 2681 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2682 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2683 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2684 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2685 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2686 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2687 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2688 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2689 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2690 2691 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2692 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2693 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2694 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2695 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2696 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2697 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2698 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2699 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2700 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2701 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2702 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2703 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2704 2705 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2706 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2707 /* insert cols for primal dofs */ 2708 for (j=0;j<primal_dofs;j++) { 2709 start_vals = &qr_basis[j*size_of_constraint]; 2710 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2711 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2712 } 2713 /* insert cols for dual dofs */ 2714 for (j=0,k=0;j<dual_dofs;k++) { 2715 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2716 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2717 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2718 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2719 j++; 2720 } 2721 } 2722 2723 /* check change of basis */ 2724 if (pcbddc->dbg_flag) { 2725 PetscInt ii,jj; 2726 PetscBool valid_qr=PETSC_TRUE; 2727 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2728 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2729 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2733 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2734 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2735 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2736 for (jj=0;jj<size_of_constraint;jj++) { 2737 for (ii=0;ii<primal_dofs;ii++) { 2738 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2739 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2740 } 2741 } 2742 if (!valid_qr) { 2743 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2744 for (jj=0;jj<size_of_constraint;jj++) { 2745 for (ii=0;ii<primal_dofs;ii++) { 2746 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2747 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2748 } 2749 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2750 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2751 } 2752 } 2753 } 2754 } else { 2755 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2756 } 2757 } 2758 } else { /* simple transformation block */ 2759 PetscInt row,col; 2760 PetscScalar val,norm; 2761 2762 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2763 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2764 for (j=0;j<size_of_constraint;j++) { 2765 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2766 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2767 if (!PetscBTLookup(is_primal,row_B)) { 2768 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2769 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2770 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2771 } else { 2772 for (k=0;k<size_of_constraint;k++) { 2773 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2774 if (row != col) { 2775 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2776 } else { 2777 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2778 } 2779 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2780 } 2781 } 2782 } 2783 if (pcbddc->dbg_flag) { 2784 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2785 } 2786 } 2787 } else { 2788 if (pcbddc->dbg_flag) { 2789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2790 } 2791 } 2792 } 2793 2794 /* free workspace */ 2795 if (qr_needed) { 2796 if (pcbddc->dbg_flag) { 2797 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2798 } 2799 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2800 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2801 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2802 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2803 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2804 } 2805 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2806 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2807 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2808 2809 /* assembling of global change of variable */ 2810 { 2811 Mat tmat; 2812 PetscInt bs; 2813 2814 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2815 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2816 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2817 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2818 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2819 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2820 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2821 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2822 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2823 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2824 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2825 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2826 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2827 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2828 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2829 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2830 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2831 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2832 } 2833 /* check */ 2834 if (pcbddc->dbg_flag) { 2835 PetscReal error; 2836 Vec x,x_change; 2837 2838 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2839 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2840 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2841 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2842 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2843 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2844 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2845 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2846 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2847 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2848 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2849 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2850 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2851 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2852 ierr = VecDestroy(&x);CHKERRQ(ierr); 2853 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2854 } 2855 2856 /* adapt sub_schurs computed (if any) */ 2857 if (pcbddc->use_deluxe_scaling) { 2858 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2859 if (sub_schurs->S_Ej_all) { 2860 Mat S_new,tmat; 2861 IS is_all_N; 2862 2863 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2864 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2865 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2866 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2867 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2868 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2869 sub_schurs->S_Ej_all = S_new; 2870 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2871 if (sub_schurs->sum_S_Ej_all) { 2872 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2873 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2874 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2875 sub_schurs->sum_S_Ej_all = S_new; 2876 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2877 } 2878 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2879 } 2880 } 2881 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2882 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2883 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2884 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2885 } 2886 2887 /* set up change of basis context */ 2888 if (pcbddc->ChangeOfBasisMatrix) { 2889 PCBDDCChange_ctx change_ctx; 2890 2891 if (!pcbddc->new_global_mat) { 2892 PetscInt global_size,local_size; 2893 2894 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2895 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2896 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2897 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2898 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2899 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2900 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2901 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2902 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2903 } else { 2904 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2905 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2906 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2907 } 2908 if (!pcbddc->user_ChangeOfBasisMatrix) { 2909 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2910 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2911 } else { 2912 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2913 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2914 } 2915 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2916 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2917 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2918 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2919 } 2920 2921 /* check if a new primal space has been introduced */ 2922 pcbddc->new_primal_space_local = PETSC_TRUE; 2923 if (olocal_primal_size == pcbddc->local_primal_size) { 2924 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2925 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2926 if (!pcbddc->new_primal_space_local) { 2927 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2928 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2929 } 2930 } 2931 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2932 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2933 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2934 2935 /* flush dbg viewer */ 2936 if (pcbddc->dbg_flag) { 2937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2938 } 2939 2940 /* free workspace */ 2941 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2942 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2943 if (!pcbddc->adaptive_selection) { 2944 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2945 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2946 } else { 2947 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2948 pcbddc->adaptive_constraints_idxs_ptr, 2949 pcbddc->adaptive_constraints_data_ptr, 2950 pcbddc->adaptive_constraints_idxs, 2951 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2952 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2953 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2954 } 2955 PetscFunctionReturn(0); 2956 } 2957 2958 #undef __FUNCT__ 2959 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2960 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2961 { 2962 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2963 PC_IS *pcis = (PC_IS*)pc->data; 2964 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2965 PetscInt ierr,i,vertex_size,N; 2966 PetscViewer viewer=pcbddc->dbg_viewer; 2967 2968 PetscFunctionBegin; 2969 /* Reset previously computed graph */ 2970 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2971 /* Init local Graph struct */ 2972 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2973 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 2974 2975 /* Check validity of the csr graph passed in by the user */ 2976 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2977 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2978 } 2979 2980 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2981 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2982 PetscInt *xadj,*adjncy; 2983 PetscInt nvtxs; 2984 PetscBool flg_row=PETSC_FALSE; 2985 2986 if (pcbddc->use_local_adj) { 2987 2988 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2989 if (flg_row) { 2990 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2991 pcbddc->computed_rowadj = PETSC_TRUE; 2992 } 2993 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2994 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 2995 IS is_dummy; 2996 ISLocalToGlobalMapping l2gmap_dummy; 2997 PetscInt j,sum; 2998 PetscInt *cxadj,*cadjncy; 2999 const PetscInt *idxs; 3000 PCBDDCGraph graph; 3001 PetscBT is_on_boundary; 3002 3003 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3004 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3005 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3006 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3007 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3008 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3009 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3010 if (flg_row) { 3011 graph->xadj = xadj; 3012 graph->adjncy = adjncy; 3013 } 3014 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3015 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3016 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3017 3018 if (pcbddc->dbg_flag) { 3019 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3020 for (i=0;i<graph->ncc;i++) { 3021 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3022 } 3023 } 3024 3025 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3026 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3027 for (i=0;i<pcis->n_B;i++) { 3028 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3029 } 3030 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3031 3032 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3033 sum = 0; 3034 for (i=0;i<graph->ncc;i++) { 3035 PetscInt sizecc = 0; 3036 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3037 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3038 sizecc++; 3039 } 3040 } 3041 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3042 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3043 cxadj[graph->queue[j]] = sizecc; 3044 } 3045 } 3046 sum += sizecc*sizecc; 3047 } 3048 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3049 sum = 0; 3050 for (i=0;i<pcis->n;i++) { 3051 PetscInt temp = cxadj[i]; 3052 cxadj[i] = sum; 3053 sum += temp; 3054 } 3055 cxadj[pcis->n] = sum; 3056 for (i=0;i<graph->ncc;i++) { 3057 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3058 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3059 PetscInt k,sizecc = 0; 3060 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3061 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3062 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3063 sizecc++; 3064 } 3065 } 3066 } 3067 } 3068 } 3069 if (sum) { 3070 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3071 } else { 3072 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3073 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3074 } 3075 graph->xadj = 0; 3076 graph->adjncy = 0; 3077 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3078 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3079 } 3080 } 3081 if (pcbddc->dbg_flag) { 3082 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3083 } 3084 3085 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3086 vertex_size = 1; 3087 if (pcbddc->user_provided_isfordofs) { 3088 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3089 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3090 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3091 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3092 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3093 } 3094 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3095 pcbddc->n_ISForDofs = 0; 3096 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3097 } 3098 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3099 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3100 } else { 3101 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3102 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3103 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3104 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3105 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3106 } 3107 } 3108 } 3109 3110 /* Setup of Graph */ 3111 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3112 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3113 } 3114 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3115 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3116 } 3117 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3118 3119 /* Graph's connected components analysis */ 3120 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3121 3122 /* print some info to stdout */ 3123 if (pcbddc->dbg_flag) { 3124 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3125 } 3126 3127 /* mark topography has done */ 3128 pcbddc->recompute_topography = PETSC_FALSE; 3129 PetscFunctionReturn(0); 3130 } 3131 3132 #undef __FUNCT__ 3133 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3134 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3135 { 3136 PetscInt i,j; 3137 PetscScalar *alphas; 3138 PetscErrorCode ierr; 3139 3140 PetscFunctionBegin; 3141 /* this implements stabilized Gram-Schmidt */ 3142 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3143 for (i=0;i<n;i++) { 3144 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3145 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3146 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3147 } 3148 ierr = PetscFree(alphas);CHKERRQ(ierr); 3149 PetscFunctionReturn(0); 3150 } 3151 3152 #undef __FUNCT__ 3153 #define __FUNCT__ "MatISGetSubassemblingPattern" 3154 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3155 { 3156 IS ranks_send_to; 3157 PetscInt n_neighs,*neighs,*n_shared,**shared; 3158 PetscMPIInt size,rank,color; 3159 PetscInt *xadj,*adjncy; 3160 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3161 PetscInt i,local_size,threshold=0; 3162 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3163 PetscSubcomm subcomm; 3164 PetscErrorCode ierr; 3165 3166 PetscFunctionBegin; 3167 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3168 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3169 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3170 3171 /* Get info on mapping */ 3172 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3173 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3174 3175 /* build local CSR graph of subdomains' connectivity */ 3176 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3177 xadj[0] = 0; 3178 xadj[1] = PetscMax(n_neighs-1,0); 3179 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3180 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3181 3182 if (threshold) { 3183 PetscInt xadj_count = 0; 3184 for (i=1;i<n_neighs;i++) { 3185 if (n_shared[i] > threshold) { 3186 adjncy[xadj_count] = neighs[i]; 3187 adjncy_wgt[xadj_count] = n_shared[i]; 3188 xadj_count++; 3189 } 3190 } 3191 xadj[1] = xadj_count; 3192 } else { 3193 if (xadj[1]) { 3194 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3195 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3196 } 3197 } 3198 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3199 if (use_square) { 3200 for (i=0;i<xadj[1];i++) { 3201 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3202 } 3203 } 3204 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3205 3206 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3207 3208 /* 3209 Restrict work on active processes only. 3210 */ 3211 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3212 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3213 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3214 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3215 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3216 if (color) { 3217 ierr = PetscFree(xadj);CHKERRQ(ierr); 3218 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3219 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3220 } else { 3221 Mat subdomain_adj; 3222 IS new_ranks,new_ranks_contig; 3223 MatPartitioning partitioner; 3224 PetscInt prank,rstart=0,rend=0; 3225 PetscInt *is_indices,*oldranks; 3226 PetscBool aggregate; 3227 3228 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3229 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3230 prank = rank; 3231 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3232 /* 3233 for (i=0;i<size;i++) { 3234 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3235 } 3236 */ 3237 for (i=0;i<xadj[1];i++) { 3238 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3239 } 3240 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3241 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3242 if (aggregate) { 3243 PetscInt lrows,row,ncols,*cols; 3244 PetscMPIInt nrank; 3245 PetscScalar *vals; 3246 3247 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3248 lrows = 0; 3249 if (nrank<redprocs) { 3250 lrows = size/redprocs; 3251 if (nrank<size%redprocs) lrows++; 3252 } 3253 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3254 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3255 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3256 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3257 row = nrank; 3258 ncols = xadj[1]-xadj[0]; 3259 cols = adjncy; 3260 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3261 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3262 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3263 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 ierr = PetscFree(xadj);CHKERRQ(ierr); 3266 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3267 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3268 ierr = PetscFree(vals);CHKERRQ(ierr); 3269 } else { 3270 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3271 } 3272 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3273 3274 /* Partition */ 3275 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3276 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3277 if (use_vwgt) { 3278 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3279 v_wgt[0] = local_size; 3280 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3281 } 3282 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3283 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3284 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3285 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3286 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3287 3288 /* renumber new_ranks to avoid "holes" in new set of processors */ 3289 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3290 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3291 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3292 if (!redprocs) { 3293 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3294 } else { 3295 PetscInt idxs[1]; 3296 PetscMPIInt tag; 3297 MPI_Request *reqs; 3298 3299 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3300 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3301 for (i=rstart;i<rend;i++) { 3302 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3303 } 3304 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3305 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3306 ierr = PetscFree(reqs);CHKERRQ(ierr); 3307 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3308 } 3309 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3310 /* clean up */ 3311 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3312 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3313 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3314 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3315 } 3316 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3317 3318 /* assemble parallel IS for sends */ 3319 i = 1; 3320 if (color) i=0; 3321 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3322 /* get back IS */ 3323 *is_sends = ranks_send_to; 3324 PetscFunctionReturn(0); 3325 } 3326 3327 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3328 3329 #undef __FUNCT__ 3330 #define __FUNCT__ "MatISSubassemble" 3331 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3332 { 3333 Mat local_mat; 3334 IS is_sends_internal; 3335 PetscInt rows,cols,new_local_rows; 3336 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3337 PetscBool ismatis,isdense,newisdense,destroy_mat; 3338 ISLocalToGlobalMapping l2gmap; 3339 PetscInt* l2gmap_indices; 3340 const PetscInt* is_indices; 3341 MatType new_local_type; 3342 /* buffers */ 3343 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3344 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3345 PetscInt *recv_buffer_idxs_local; 3346 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3347 /* MPI */ 3348 MPI_Comm comm,comm_n; 3349 PetscSubcomm subcomm; 3350 PetscMPIInt n_sends,n_recvs,commsize; 3351 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3352 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3353 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3354 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3355 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3356 PetscErrorCode ierr; 3357 3358 PetscFunctionBegin; 3359 /* TODO: add missing checks */ 3360 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3361 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3362 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3363 PetscValidLogicalCollectiveInt(mat,nis,7); 3364 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3365 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3366 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3367 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3368 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3369 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3370 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3371 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3372 PetscInt mrows,mcols,mnrows,mncols; 3373 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3374 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3375 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3376 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3377 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3378 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3379 } 3380 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3381 PetscValidLogicalCollectiveInt(mat,bs,0); 3382 /* prepare IS for sending if not provided */ 3383 if (!is_sends) { 3384 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3385 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3386 } else { 3387 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3388 is_sends_internal = is_sends; 3389 } 3390 3391 /* get comm */ 3392 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3393 3394 /* compute number of sends */ 3395 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3396 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3397 3398 /* compute number of receives */ 3399 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3400 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3401 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3402 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3403 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3404 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3405 ierr = PetscFree(iflags);CHKERRQ(ierr); 3406 3407 /* restrict comm if requested */ 3408 subcomm = 0; 3409 destroy_mat = PETSC_FALSE; 3410 if (restrict_comm) { 3411 PetscMPIInt color,subcommsize; 3412 3413 color = 0; 3414 if (restrict_full) { 3415 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3416 } else { 3417 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3418 } 3419 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3420 subcommsize = commsize - subcommsize; 3421 /* check if reuse has been requested */ 3422 if (reuse == MAT_REUSE_MATRIX) { 3423 if (*mat_n) { 3424 PetscMPIInt subcommsize2; 3425 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3426 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3427 comm_n = PetscObjectComm((PetscObject)*mat_n); 3428 } else { 3429 comm_n = PETSC_COMM_SELF; 3430 } 3431 } else { /* MAT_INITIAL_MATRIX */ 3432 PetscMPIInt rank; 3433 3434 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3435 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3436 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3437 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3438 comm_n = PetscSubcommChild(subcomm); 3439 } 3440 /* flag to destroy *mat_n if not significative */ 3441 if (color) destroy_mat = PETSC_TRUE; 3442 } else { 3443 comm_n = comm; 3444 } 3445 3446 /* prepare send/receive buffers */ 3447 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3448 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3449 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3450 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3451 if (nis) { 3452 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3453 } 3454 3455 /* Get data from local matrices */ 3456 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3457 /* TODO: See below some guidelines on how to prepare the local buffers */ 3458 /* 3459 send_buffer_vals should contain the raw values of the local matrix 3460 send_buffer_idxs should contain: 3461 - MatType_PRIVATE type 3462 - PetscInt size_of_l2gmap 3463 - PetscInt global_row_indices[size_of_l2gmap] 3464 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3465 */ 3466 else { 3467 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3468 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3469 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3470 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3471 send_buffer_idxs[1] = i; 3472 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3473 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3474 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3475 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3476 for (i=0;i<n_sends;i++) { 3477 ilengths_vals[is_indices[i]] = len*len; 3478 ilengths_idxs[is_indices[i]] = len+2; 3479 } 3480 } 3481 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3482 /* additional is (if any) */ 3483 if (nis) { 3484 PetscMPIInt psum; 3485 PetscInt j; 3486 for (j=0,psum=0;j<nis;j++) { 3487 PetscInt plen; 3488 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3489 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3490 psum += len+1; /* indices + lenght */ 3491 } 3492 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3493 for (j=0,psum=0;j<nis;j++) { 3494 PetscInt plen; 3495 const PetscInt *is_array_idxs; 3496 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3497 send_buffer_idxs_is[psum] = plen; 3498 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3499 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3500 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3501 psum += plen+1; /* indices + lenght */ 3502 } 3503 for (i=0;i<n_sends;i++) { 3504 ilengths_idxs_is[is_indices[i]] = psum; 3505 } 3506 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3507 } 3508 3509 buf_size_idxs = 0; 3510 buf_size_vals = 0; 3511 buf_size_idxs_is = 0; 3512 for (i=0;i<n_recvs;i++) { 3513 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3514 buf_size_vals += (PetscInt)olengths_vals[i]; 3515 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3516 } 3517 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3518 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3519 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3520 3521 /* get new tags for clean communications */ 3522 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3523 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3524 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3525 3526 /* allocate for requests */ 3527 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3528 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3529 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3530 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3531 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3532 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3533 3534 /* communications */ 3535 ptr_idxs = recv_buffer_idxs; 3536 ptr_vals = recv_buffer_vals; 3537 ptr_idxs_is = recv_buffer_idxs_is; 3538 for (i=0;i<n_recvs;i++) { 3539 source_dest = onodes[i]; 3540 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3541 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3542 ptr_idxs += olengths_idxs[i]; 3543 ptr_vals += olengths_vals[i]; 3544 if (nis) { 3545 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3546 ptr_idxs_is += olengths_idxs_is[i]; 3547 } 3548 } 3549 for (i=0;i<n_sends;i++) { 3550 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3551 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3552 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3553 if (nis) { 3554 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3555 } 3556 } 3557 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3558 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3559 3560 /* assemble new l2g map */ 3561 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3562 ptr_idxs = recv_buffer_idxs; 3563 new_local_rows = 0; 3564 for (i=0;i<n_recvs;i++) { 3565 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3566 ptr_idxs += olengths_idxs[i]; 3567 } 3568 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3569 ptr_idxs = recv_buffer_idxs; 3570 new_local_rows = 0; 3571 for (i=0;i<n_recvs;i++) { 3572 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3573 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3574 ptr_idxs += olengths_idxs[i]; 3575 } 3576 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3577 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3578 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3579 3580 /* infer new local matrix type from received local matrices type */ 3581 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3582 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3583 if (n_recvs) { 3584 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3585 ptr_idxs = recv_buffer_idxs; 3586 for (i=0;i<n_recvs;i++) { 3587 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3588 new_local_type_private = MATAIJ_PRIVATE; 3589 break; 3590 } 3591 ptr_idxs += olengths_idxs[i]; 3592 } 3593 switch (new_local_type_private) { 3594 case MATDENSE_PRIVATE: 3595 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3596 new_local_type = MATSEQAIJ; 3597 bs = 1; 3598 } else { /* if I receive only 1 dense matrix */ 3599 new_local_type = MATSEQDENSE; 3600 bs = 1; 3601 } 3602 break; 3603 case MATAIJ_PRIVATE: 3604 new_local_type = MATSEQAIJ; 3605 bs = 1; 3606 break; 3607 case MATBAIJ_PRIVATE: 3608 new_local_type = MATSEQBAIJ; 3609 break; 3610 case MATSBAIJ_PRIVATE: 3611 new_local_type = MATSEQSBAIJ; 3612 break; 3613 default: 3614 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3615 break; 3616 } 3617 } else { /* by default, new_local_type is seqdense */ 3618 new_local_type = MATSEQDENSE; 3619 bs = 1; 3620 } 3621 3622 /* create MATIS object if needed */ 3623 if (reuse == MAT_INITIAL_MATRIX) { 3624 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3625 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3626 } else { 3627 /* it also destroys the local matrices */ 3628 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3629 } 3630 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3631 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3632 3633 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3634 3635 /* Global to local map of received indices */ 3636 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3637 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3638 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3639 3640 /* restore attributes -> type of incoming data and its size */ 3641 buf_size_idxs = 0; 3642 for (i=0;i<n_recvs;i++) { 3643 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3644 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3645 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3646 } 3647 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3648 3649 /* set preallocation */ 3650 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3651 if (!newisdense) { 3652 PetscInt *new_local_nnz=0; 3653 3654 ptr_idxs = recv_buffer_idxs_local; 3655 if (n_recvs) { 3656 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3657 } 3658 for (i=0;i<n_recvs;i++) { 3659 PetscInt j; 3660 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3661 for (j=0;j<*(ptr_idxs+1);j++) { 3662 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3663 } 3664 } else { 3665 /* TODO */ 3666 } 3667 ptr_idxs += olengths_idxs[i]; 3668 } 3669 if (new_local_nnz) { 3670 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3671 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3672 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3673 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3674 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3675 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3676 } else { 3677 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3678 } 3679 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3680 } else { 3681 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3682 } 3683 3684 /* set values */ 3685 ptr_vals = recv_buffer_vals; 3686 ptr_idxs = recv_buffer_idxs_local; 3687 for (i=0;i<n_recvs;i++) { 3688 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3689 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3690 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3691 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3692 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3693 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3694 } else { 3695 /* TODO */ 3696 } 3697 ptr_idxs += olengths_idxs[i]; 3698 ptr_vals += olengths_vals[i]; 3699 } 3700 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3701 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3702 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3703 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3704 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3705 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3706 3707 #if 0 3708 if (!restrict_comm) { /* check */ 3709 Vec lvec,rvec; 3710 PetscReal infty_error; 3711 3712 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3713 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3714 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3715 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3716 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3717 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3718 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3719 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3720 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3721 } 3722 #endif 3723 3724 /* assemble new additional is (if any) */ 3725 if (nis) { 3726 PetscInt **temp_idxs,*count_is,j,psum; 3727 3728 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3729 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3730 ptr_idxs = recv_buffer_idxs_is; 3731 psum = 0; 3732 for (i=0;i<n_recvs;i++) { 3733 for (j=0;j<nis;j++) { 3734 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3735 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3736 psum += plen; 3737 ptr_idxs += plen+1; /* shift pointer to received data */ 3738 } 3739 } 3740 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3741 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3742 for (i=1;i<nis;i++) { 3743 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3744 } 3745 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3746 ptr_idxs = recv_buffer_idxs_is; 3747 for (i=0;i<n_recvs;i++) { 3748 for (j=0;j<nis;j++) { 3749 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3750 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3751 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3752 ptr_idxs += plen+1; /* shift pointer to received data */ 3753 } 3754 } 3755 for (i=0;i<nis;i++) { 3756 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3757 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3758 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3759 } 3760 ierr = PetscFree(count_is);CHKERRQ(ierr); 3761 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3762 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3763 } 3764 /* free workspace */ 3765 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3766 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3767 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3768 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3769 if (isdense) { 3770 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3771 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3772 } else { 3773 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3774 } 3775 if (nis) { 3776 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3777 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3778 } 3779 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3780 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3781 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3782 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3783 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3784 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3785 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3786 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3787 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3788 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3789 ierr = PetscFree(onodes);CHKERRQ(ierr); 3790 if (nis) { 3791 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3792 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3793 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3794 } 3795 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3796 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3797 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3798 for (i=0;i<nis;i++) { 3799 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3800 } 3801 *mat_n = NULL; 3802 } 3803 PetscFunctionReturn(0); 3804 } 3805 3806 /* temporary hack into ksp private data structure */ 3807 #include <petsc/private/kspimpl.h> 3808 3809 #undef __FUNCT__ 3810 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3811 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3812 { 3813 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3814 PC_IS *pcis = (PC_IS*)pc->data; 3815 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3816 ISLocalToGlobalMapping coarse_islg; 3817 IS coarse_is,*isarray; 3818 PetscInt i,im_active=-1,active_procs=-1; 3819 PetscInt nis,nisdofs,nisneu; 3820 PC pc_temp; 3821 PCType coarse_pc_type; 3822 KSPType coarse_ksp_type; 3823 PetscBool multilevel_requested,multilevel_allowed; 3824 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3825 Mat t_coarse_mat_is; 3826 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3827 PetscMPIInt all_procs; 3828 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3829 PetscBool compute_vecs = PETSC_FALSE; 3830 PetscScalar *array; 3831 PetscErrorCode ierr; 3832 3833 PetscFunctionBegin; 3834 /* Assign global numbering to coarse dofs */ 3835 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3836 PetscInt ocoarse_size; 3837 compute_vecs = PETSC_TRUE; 3838 ocoarse_size = pcbddc->coarse_size; 3839 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3840 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3841 /* see if we can avoid some work */ 3842 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3843 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 3844 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 3845 PC pc; 3846 PetscBool isbddc; 3847 3848 /* temporary workaround since PCBDDC does not have a reset method so far */ 3849 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 3850 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3851 if (isbddc) { 3852 ierr = PCDestroy(&pc);CHKERRQ(ierr); 3853 } 3854 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3855 coarse_reuse = PETSC_FALSE; 3856 } else { /* we can safely reuse already computed coarse matrix */ 3857 coarse_reuse = PETSC_TRUE; 3858 } 3859 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3860 coarse_reuse = PETSC_FALSE; 3861 } 3862 /* reset any subassembling information */ 3863 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3864 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3865 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3866 coarse_reuse = PETSC_TRUE; 3867 } 3868 3869 /* count "active" (i.e. with positive local size) and "void" processes */ 3870 im_active = !!(pcis->n); 3871 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3872 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3873 void_procs = all_procs-active_procs; 3874 csin_type_simple = PETSC_TRUE; 3875 redist = PETSC_FALSE; 3876 if (pcbddc->current_level && void_procs) { 3877 csin_ml = PETSC_TRUE; 3878 ncoarse_ml = void_procs; 3879 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3880 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3881 csin_ds = PETSC_TRUE; 3882 ncoarse_ds = pcbddc->redistribute_coarse; 3883 redist = PETSC_TRUE; 3884 } else { 3885 csin_ds = PETSC_TRUE; 3886 ncoarse_ds = active_procs; 3887 redist = PETSC_TRUE; 3888 } 3889 } else { 3890 csin_ml = PETSC_FALSE; 3891 ncoarse_ml = all_procs; 3892 if (void_procs) { 3893 csin_ds = PETSC_TRUE; 3894 ncoarse_ds = void_procs; 3895 csin_type_simple = PETSC_FALSE; 3896 } else { 3897 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3898 csin_ds = PETSC_TRUE; 3899 ncoarse_ds = pcbddc->redistribute_coarse; 3900 redist = PETSC_TRUE; 3901 } else { 3902 csin_ds = PETSC_FALSE; 3903 ncoarse_ds = all_procs; 3904 } 3905 } 3906 } 3907 3908 /* 3909 test if we can go multilevel: three conditions must be satisfied: 3910 - we have not exceeded the number of levels requested 3911 - we can actually subassemble the active processes 3912 - we can find a suitable number of MPI processes where we can place the subassembled problem 3913 */ 3914 multilevel_allowed = PETSC_FALSE; 3915 multilevel_requested = PETSC_FALSE; 3916 if (pcbddc->current_level < pcbddc->max_levels) { 3917 multilevel_requested = PETSC_TRUE; 3918 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3919 multilevel_allowed = PETSC_FALSE; 3920 } else { 3921 multilevel_allowed = PETSC_TRUE; 3922 } 3923 } 3924 /* determine number of process partecipating to coarse solver */ 3925 if (multilevel_allowed) { 3926 ncoarse = ncoarse_ml; 3927 csin = csin_ml; 3928 redist = PETSC_FALSE; 3929 } else { 3930 ncoarse = ncoarse_ds; 3931 csin = csin_ds; 3932 } 3933 3934 /* creates temporary l2gmap and IS for coarse indexes */ 3935 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3936 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3937 3938 /* creates temporary MATIS object for coarse matrix */ 3939 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3940 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3941 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3942 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3943 #if 0 3944 { 3945 PetscViewer viewer; 3946 char filename[256]; 3947 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3948 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3949 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3950 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3951 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 3952 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3953 } 3954 #endif 3955 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 3956 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3957 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3958 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3959 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3960 3961 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3962 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3963 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3964 const PetscInt *idxs; 3965 ISLocalToGlobalMapping tmap; 3966 3967 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3968 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3969 /* allocate space for temporary storage */ 3970 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3971 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3972 /* allocate for IS array */ 3973 nisdofs = pcbddc->n_ISForDofsLocal; 3974 nisneu = !!pcbddc->NeumannBoundariesLocal; 3975 nis = nisdofs + nisneu; 3976 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3977 /* dofs splitting */ 3978 for (i=0;i<nisdofs;i++) { 3979 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3980 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3981 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3982 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3983 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3984 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3985 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3986 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3987 } 3988 /* neumann boundaries */ 3989 if (pcbddc->NeumannBoundariesLocal) { 3990 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3991 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3992 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3993 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3994 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3995 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3996 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3997 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3998 } 3999 /* free memory */ 4000 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4001 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4002 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4003 } else { 4004 nis = 0; 4005 nisdofs = 0; 4006 nisneu = 0; 4007 isarray = NULL; 4008 } 4009 /* destroy no longer needed map */ 4010 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4011 4012 /* restrict on coarse candidates (if needed) */ 4013 coarse_mat_is = NULL; 4014 if (csin) { 4015 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4016 if (redist) { 4017 PetscMPIInt rank; 4018 PetscInt spc,n_spc_p1,dest[1],destsize; 4019 4020 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4021 spc = active_procs/ncoarse; 4022 n_spc_p1 = active_procs%ncoarse; 4023 if (im_active) { 4024 destsize = 1; 4025 if (rank > n_spc_p1*(spc+1)-1) { 4026 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4027 } else { 4028 dest[0] = rank/(spc+1); 4029 } 4030 } else { 4031 destsize = 0; 4032 } 4033 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4034 } else if (csin_type_simple) { 4035 PetscMPIInt rank; 4036 PetscInt issize,isidx; 4037 4038 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4039 if (im_active) { 4040 issize = 1; 4041 isidx = (PetscInt)rank; 4042 } else { 4043 issize = 0; 4044 isidx = -1; 4045 } 4046 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4047 } else { /* get a suitable subassembling pattern from MATIS code */ 4048 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4049 } 4050 4051 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4052 if (!redist || ncoarse <= void_procs) { 4053 PetscInt ncoarse_cand,tissize,*nisindices; 4054 PetscInt *coarse_candidates; 4055 const PetscInt* tisindices; 4056 4057 /* get coarse candidates' ranks in pc communicator */ 4058 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4059 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4060 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4061 if (!coarse_candidates[i]) { 4062 coarse_candidates[ncoarse_cand++]=i; 4063 } 4064 } 4065 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4066 4067 4068 if (pcbddc->dbg_flag) { 4069 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4070 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4071 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4072 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4073 for (i=0;i<ncoarse_cand;i++) { 4074 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4075 } 4076 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4077 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4078 } 4079 /* shift the pattern on coarse candidates */ 4080 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4081 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4082 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4083 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4084 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4085 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4086 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4087 } 4088 if (pcbddc->dbg_flag) { 4089 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4090 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4091 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4093 } 4094 } 4095 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4096 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4097 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4098 } else { /* this is the last level, so use just receiving processes in subcomm */ 4099 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4100 } 4101 } else { 4102 if (pcbddc->dbg_flag) { 4103 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4104 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4105 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4106 } 4107 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4108 coarse_mat_is = t_coarse_mat_is; 4109 } 4110 4111 /* create local to global scatters for coarse problem */ 4112 if (compute_vecs) { 4113 PetscInt lrows; 4114 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4115 if (coarse_mat_is) { 4116 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4117 } else { 4118 lrows = 0; 4119 } 4120 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4121 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4122 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4123 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4124 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4125 } 4126 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4127 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4128 4129 /* set defaults for coarse KSP and PC */ 4130 if (multilevel_allowed) { 4131 coarse_ksp_type = KSPRICHARDSON; 4132 coarse_pc_type = PCBDDC; 4133 } else { 4134 coarse_ksp_type = KSPPREONLY; 4135 coarse_pc_type = PCREDUNDANT; 4136 } 4137 4138 /* print some info if requested */ 4139 if (pcbddc->dbg_flag) { 4140 if (!multilevel_allowed) { 4141 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4142 if (multilevel_requested) { 4143 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4144 } else if (pcbddc->max_levels) { 4145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4146 } 4147 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4148 } 4149 } 4150 4151 /* create the coarse KSP object only once with defaults */ 4152 if (coarse_mat_is) { 4153 MatReuse coarse_mat_reuse; 4154 PetscViewer dbg_viewer = NULL; 4155 if (pcbddc->dbg_flag) { 4156 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4157 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4158 } 4159 if (!pcbddc->coarse_ksp) { 4160 char prefix[256],str_level[16]; 4161 size_t len; 4162 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4163 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4164 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4165 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4166 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4167 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4168 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4169 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4170 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4171 /* prefix */ 4172 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4173 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4174 if (!pcbddc->current_level) { 4175 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4176 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4177 } else { 4178 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4179 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4180 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4181 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4182 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4183 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4184 } 4185 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4186 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4187 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4188 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4189 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4190 /* allow user customization */ 4191 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4192 } 4193 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4194 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4195 if (nisdofs) { 4196 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4197 for (i=0;i<nisdofs;i++) { 4198 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4199 } 4200 } 4201 if (nisneu) { 4202 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4203 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4204 } 4205 4206 /* get some info after set from options */ 4207 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4208 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4209 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4210 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4211 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4212 isbddc = PETSC_FALSE; 4213 } 4214 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4215 if (isredundant) { 4216 KSP inner_ksp; 4217 PC inner_pc; 4218 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4219 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4220 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4221 } 4222 4223 /* assemble coarse matrix */ 4224 if (coarse_reuse) { 4225 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4226 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4227 coarse_mat_reuse = MAT_REUSE_MATRIX; 4228 } else { 4229 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4230 } 4231 if (isbddc || isnn) { 4232 if (pcbddc->coarsening_ratio > 1) { 4233 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4234 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4235 if (pcbddc->dbg_flag) { 4236 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4237 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4238 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4239 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4240 } 4241 } 4242 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4243 } else { 4244 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4245 coarse_mat = coarse_mat_is; 4246 } 4247 } else { 4248 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4249 } 4250 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4251 4252 /* propagate symmetry info of coarse matrix */ 4253 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4254 if (pc->pmat->symmetric_set) { 4255 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4256 } 4257 if (pc->pmat->hermitian_set) { 4258 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4259 } 4260 if (pc->pmat->spd_set) { 4261 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4262 } 4263 /* set operators */ 4264 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4265 if (pcbddc->dbg_flag) { 4266 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4267 } 4268 } else { /* processes non partecipating to coarse solver (if any) */ 4269 coarse_mat = 0; 4270 } 4271 ierr = PetscFree(isarray);CHKERRQ(ierr); 4272 #if 0 4273 { 4274 PetscViewer viewer; 4275 char filename[256]; 4276 sprintf(filename,"coarse_mat.m"); 4277 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4278 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4279 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4280 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 4281 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4282 } 4283 #endif 4284 4285 if (pcbddc->coarse_ksp) { 4286 Vec crhs,csol; 4287 PetscBool ispreonly; 4288 4289 /* setup coarse ksp */ 4290 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4291 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4292 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4293 /* hack */ 4294 if (!csol) { 4295 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4296 } 4297 if (!crhs) { 4298 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4299 } 4300 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4301 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4302 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4303 KSP check_ksp; 4304 KSPType check_ksp_type; 4305 PC check_pc; 4306 Vec check_vec,coarse_vec; 4307 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4308 PetscInt its; 4309 PetscBool compute_eigs; 4310 PetscReal *eigs_r,*eigs_c; 4311 PetscInt neigs; 4312 const char *prefix; 4313 4314 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4315 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4316 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4317 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4318 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4319 if (ispreonly) { 4320 check_ksp_type = KSPPREONLY; 4321 compute_eigs = PETSC_FALSE; 4322 } else { 4323 check_ksp_type = KSPGMRES; 4324 compute_eigs = PETSC_TRUE; 4325 } 4326 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4327 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4328 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4329 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4330 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4331 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4332 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4333 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4334 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4335 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4336 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4337 /* create random vec */ 4338 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4339 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4340 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4341 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4342 /* solve coarse problem */ 4343 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4344 /* set eigenvalue estimation if preonly has not been requested */ 4345 if (compute_eigs) { 4346 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4347 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4348 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4349 lambda_max = eigs_r[neigs-1]; 4350 lambda_min = eigs_r[0]; 4351 if (pcbddc->use_coarse_estimates) { 4352 if (lambda_max>lambda_min) { 4353 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4354 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4355 } 4356 } 4357 } 4358 4359 /* check coarse problem residual error */ 4360 if (pcbddc->dbg_flag) { 4361 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4362 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4363 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4364 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4365 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4366 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4367 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4368 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4369 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4370 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4371 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4372 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4373 if (compute_eigs) { 4374 PetscReal lambda_max_s,lambda_min_s; 4375 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4376 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4377 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4378 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4379 for (i=0;i<neigs;i++) { 4380 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4381 } 4382 } 4383 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4384 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4385 } 4386 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4387 if (compute_eigs) { 4388 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4389 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4390 } 4391 } 4392 } 4393 /* print additional info */ 4394 if (pcbddc->dbg_flag) { 4395 /* waits until all processes reaches this point */ 4396 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4397 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4399 } 4400 4401 /* free memory */ 4402 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4403 PetscFunctionReturn(0); 4404 } 4405 4406 #undef __FUNCT__ 4407 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4408 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4409 { 4410 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4411 PC_IS* pcis = (PC_IS*)pc->data; 4412 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4413 IS subset,subset_mult,subset_n; 4414 PetscInt local_size,coarse_size=0; 4415 PetscInt *local_primal_indices=NULL; 4416 const PetscInt *t_local_primal_indices; 4417 PetscErrorCode ierr; 4418 4419 PetscFunctionBegin; 4420 /* Compute global number of coarse dofs */ 4421 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4422 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4423 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4424 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4425 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4426 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4427 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4428 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4429 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4430 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 4431 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4432 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4433 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4434 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4435 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4436 4437 /* check numbering */ 4438 if (pcbddc->dbg_flag) { 4439 PetscScalar coarsesum,*array; 4440 PetscInt i; 4441 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4442 4443 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4444 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4445 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4446 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4447 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4448 for (i=0;i<pcbddc->local_primal_size;i++) { 4449 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4450 } 4451 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4452 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4453 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4454 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4455 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4456 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4457 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4458 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4459 for (i=0;i<pcis->n;i++) { 4460 if (array[i] == 1.0) { 4461 set_error = PETSC_TRUE; 4462 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4463 } 4464 } 4465 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4466 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4467 for (i=0;i<pcis->n;i++) { 4468 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4469 } 4470 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4471 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4472 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4473 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4474 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4475 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4476 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4477 PetscInt *gidxs; 4478 4479 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4480 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4481 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4482 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4483 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4484 for (i=0;i<pcbddc->local_primal_size;i++) { 4485 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4486 } 4487 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4488 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4489 } 4490 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4491 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4492 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4493 } 4494 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4495 /* get back data */ 4496 *coarse_size_n = coarse_size; 4497 *local_primal_indices_n = local_primal_indices; 4498 PetscFunctionReturn(0); 4499 } 4500 4501 #undef __FUNCT__ 4502 #define __FUNCT__ "PCBDDCGlobalToLocal" 4503 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4504 { 4505 IS localis_t; 4506 PetscInt i,lsize,*idxs,n; 4507 PetscScalar *vals; 4508 PetscErrorCode ierr; 4509 4510 PetscFunctionBegin; 4511 /* get indices in local ordering exploiting local to global map */ 4512 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4513 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4514 for (i=0;i<lsize;i++) vals[i] = 1.0; 4515 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4516 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4517 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4518 if (idxs) { /* multilevel guard */ 4519 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4520 } 4521 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4522 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4523 ierr = PetscFree(vals);CHKERRQ(ierr); 4524 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4525 /* now compute set in local ordering */ 4526 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4527 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4528 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4529 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4530 for (i=0,lsize=0;i<n;i++) { 4531 if (PetscRealPart(vals[i]) > 0.5) { 4532 lsize++; 4533 } 4534 } 4535 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4536 for (i=0,lsize=0;i<n;i++) { 4537 if (PetscRealPart(vals[i]) > 0.5) { 4538 idxs[lsize++] = i; 4539 } 4540 } 4541 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4542 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4543 *localis = localis_t; 4544 PetscFunctionReturn(0); 4545 } 4546 4547 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4548 #undef __FUNCT__ 4549 #define __FUNCT__ "PCBDDCMatMult_Private" 4550 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4551 { 4552 PCBDDCChange_ctx change_ctx; 4553 PetscErrorCode ierr; 4554 4555 PetscFunctionBegin; 4556 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4557 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4558 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4559 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4560 PetscFunctionReturn(0); 4561 } 4562 4563 #undef __FUNCT__ 4564 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4565 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4566 { 4567 PCBDDCChange_ctx change_ctx; 4568 PetscErrorCode ierr; 4569 4570 PetscFunctionBegin; 4571 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4572 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4573 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4574 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4575 PetscFunctionReturn(0); 4576 } 4577 4578 #undef __FUNCT__ 4579 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4580 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4581 { 4582 PC_IS *pcis=(PC_IS*)pc->data; 4583 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4584 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4585 Mat S_j; 4586 PetscInt *used_xadj,*used_adjncy; 4587 PetscBool free_used_adj; 4588 PetscErrorCode ierr; 4589 4590 PetscFunctionBegin; 4591 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4592 free_used_adj = PETSC_FALSE; 4593 if (pcbddc->sub_schurs_layers == -1) { 4594 used_xadj = NULL; 4595 used_adjncy = NULL; 4596 } else { 4597 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4598 used_xadj = pcbddc->mat_graph->xadj; 4599 used_adjncy = pcbddc->mat_graph->adjncy; 4600 } else if (pcbddc->computed_rowadj) { 4601 used_xadj = pcbddc->mat_graph->xadj; 4602 used_adjncy = pcbddc->mat_graph->adjncy; 4603 } else { 4604 PetscBool flg_row=PETSC_FALSE; 4605 const PetscInt *xadj,*adjncy; 4606 PetscInt nvtxs; 4607 4608 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4609 if (flg_row) { 4610 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4611 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4612 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4613 free_used_adj = PETSC_TRUE; 4614 } else { 4615 pcbddc->sub_schurs_layers = -1; 4616 used_xadj = NULL; 4617 used_adjncy = NULL; 4618 } 4619 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4620 } 4621 } 4622 4623 /* setup sub_schurs data */ 4624 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4625 if (!sub_schurs->use_mumps) { 4626 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4627 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4628 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4629 } else { 4630 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4631 PetscBool isseqaij; 4632 if (!pcbddc->use_vertices && reuse_solvers) { 4633 PetscInt n_vertices; 4634 4635 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4636 reuse_solvers = (PetscBool)!n_vertices; 4637 } 4638 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4639 if (!isseqaij) { 4640 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4641 if (matis->A == pcbddc->local_mat) { 4642 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4643 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4644 } else { 4645 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4646 } 4647 } 4648 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4649 } 4650 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4651 4652 /* free adjacency */ 4653 if (free_used_adj) { 4654 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4655 } 4656 PetscFunctionReturn(0); 4657 } 4658 4659 #undef __FUNCT__ 4660 #define __FUNCT__ "PCBDDCInitSubSchurs" 4661 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4662 { 4663 PC_IS *pcis=(PC_IS*)pc->data; 4664 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4665 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4666 PCBDDCGraph graph; 4667 PetscErrorCode ierr; 4668 4669 PetscFunctionBegin; 4670 /* attach interface graph for determining subsets */ 4671 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4672 IS verticesIS,verticescomm; 4673 PetscInt vsize,*idxs; 4674 4675 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4676 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4677 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4678 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4679 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4680 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4681 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4682 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4683 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4684 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4685 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4686 /* 4687 if (pcbddc->dbg_flag) { 4688 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4689 } 4690 */ 4691 } else { 4692 graph = pcbddc->mat_graph; 4693 } 4694 4695 /* sub_schurs init */ 4696 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4697 4698 /* free graph struct */ 4699 if (pcbddc->sub_schurs_rebuild) { 4700 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4701 } 4702 PetscFunctionReturn(0); 4703 } 4704