1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 29 30 if (pcbddc->dbg_flag) { 31 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 32 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 33 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 35 } 36 37 if (pcbddc->dbg_flag) { 38 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 39 } 40 41 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 42 43 /* max size of subsets */ 44 mss = 0; 45 for (i=0;i<sub_schurs->n_subs;i++) { 46 PetscInt subset_size; 47 48 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 49 mss = PetscMax(mss,subset_size); 50 } 51 52 /* min/max and threshold */ 53 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 54 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 55 nmax = PetscMax(nmin,nmax); 56 allocated_S_St = PETSC_FALSE; 57 if (nmin) { 58 allocated_S_St = PETSC_TRUE; 59 } 60 61 /* allocate lapack workspace */ 62 cum = cum2 = 0; 63 maxneigs = 0; 64 for (i=0;i<sub_schurs->n_subs;i++) { 65 PetscInt n,subset_size; 66 67 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 68 n = PetscMin(subset_size,nmax); 69 cum += subset_size; 70 cum2 += subset_size*n; 71 maxneigs = PetscMax(maxneigs,n); 72 } 73 if (mss) { 74 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 75 PetscBLASInt B_itype = 1; 76 PetscBLASInt B_N = mss; 77 PetscReal zero = 0.0; 78 PetscReal eps = 0.0; /* dlamch? */ 79 80 B_lwork = -1; 81 S = NULL; 82 St = NULL; 83 eigs = NULL; 84 eigv = NULL; 85 B_iwork = NULL; 86 B_ifail = NULL; 87 #if defined(PETSC_USE_COMPLEX) 88 rwork = NULL; 89 #endif 90 thresh = 1.0; 91 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 92 #if defined(PETSC_USE_COMPLEX) 93 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 94 #else 95 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 96 #endif 97 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 98 ierr = PetscFPTrapPop();CHKERRQ(ierr); 99 } else { 100 /* TODO */ 101 } 102 } else { 103 lwork = 0; 104 } 105 106 nv = 0; 107 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 108 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 109 } 110 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 111 if (allocated_S_St) { 112 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 113 } 114 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 115 #if defined(PETSC_USE_COMPLEX) 116 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 117 #endif 118 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 119 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 121 nv+cum,&pcbddc->adaptive_constraints_idxs, 122 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 123 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 124 125 maxneigs = 0; 126 cum = cum2 = cumarray = 0; 127 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 128 pcbddc->adaptive_constraints_data_ptr[0] = 0; 129 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 130 const PetscInt *idxs; 131 132 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 133 for (cum=0;cum<nv;cum++) { 134 pcbddc->adaptive_constraints_n[cum] = 1; 135 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 136 pcbddc->adaptive_constraints_data[cum] = 1.0; 137 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 138 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 139 } 140 cum2 = cum; 141 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 142 } 143 144 if (mss) { /* multilevel */ 145 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 147 } 148 149 for (i=0;i<sub_schurs->n_subs;i++) { 150 151 const PetscInt *idxs; 152 PetscReal infty = PETSC_MAX_REAL; 153 PetscInt j,subset_size,eigs_start = 0; 154 PetscBLASInt B_N; 155 PetscBool same_data = PETSC_FALSE; 156 157 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 158 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 159 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 160 if (sub_schurs->is_hermitian) { 161 PetscInt j,k; 162 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 163 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 } 166 for (j=0;j<subset_size;j++) { 167 for (k=j;k<subset_size;k++) { 168 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 169 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 170 } 171 } 172 } else { 173 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 174 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 } 176 } else { 177 S = Sarray + cumarray; 178 St = Starray + cumarray; 179 } 180 181 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 182 /* see if we can save some work */ 183 if (sub_schurs->n_subs == 1) { 184 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 185 } 186 187 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 188 B_neigs = 0; 189 } else { 190 /* Threshold: this is an heuristic for edges */ 191 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 192 193 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 194 PetscBLASInt B_itype = 1; 195 PetscBLASInt B_IL, B_IU; 196 PetscReal eps = -1.0; /* dlamch? */ 197 PetscInt nmin_s; 198 199 /* ask for eigenvalues larger than thresh */ 200 if (pcbddc->dbg_flag) { 201 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 202 } 203 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 204 #if defined(PETSC_USE_COMPLEX) 205 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 206 #else 207 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 208 #endif 209 ierr = PetscFPTrapPop();CHKERRQ(ierr); 210 if (B_ierr) { 211 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 212 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 213 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 214 } 215 216 if (B_neigs > nmax) { 217 if (pcbddc->dbg_flag) { 218 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 219 } 220 eigs_start = B_neigs -nmax; 221 B_neigs = nmax; 222 } 223 224 nmin_s = PetscMin(nmin,B_N); 225 if (B_neigs < nmin_s) { 226 PetscBLASInt B_neigs2; 227 228 B_IU = B_N - B_neigs; 229 B_IL = B_N - nmin_s + 1; 230 if (pcbddc->dbg_flag) { 231 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 232 } 233 if (sub_schurs->is_hermitian) { 234 PetscInt j; 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 for (j=0;j<subset_size;j++) { 239 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 240 } 241 } else { 242 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 244 } 245 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 246 #if defined(PETSC_USE_COMPLEX) 247 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 248 #else 249 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 250 #endif 251 ierr = PetscFPTrapPop();CHKERRQ(ierr); 252 B_neigs += B_neigs2; 253 } 254 if (B_ierr) { 255 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 256 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 257 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 if (pcbddc->dbg_flag) { 260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 261 for (j=0;j<B_neigs;j++) { 262 if (eigs[j] == 0.0) { 263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 264 } else { 265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 266 } 267 } 268 } 269 } else { 270 /* TODO */ 271 } 272 } 273 maxneigs = PetscMax(B_neigs,maxneigs); 274 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 275 if (B_neigs) { 276 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 277 278 if (pcbddc->dbg_flag > 1) { 279 PetscInt ii; 280 for (ii=0;ii<B_neigs;ii++) { 281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 282 for (j=0;j<B_N;j++) { 283 #if defined(PETSC_USE_COMPLEX) 284 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 287 #else 288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 289 #endif 290 } 291 } 292 } 293 #if 0 294 for (j=0;j<B_neigs;j++) { 295 PetscBLASInt Blas_N,Blas_one = 1.0; 296 PetscScalar norm; 297 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 298 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 299 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 300 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 301 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 302 } else { 303 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 304 } 305 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 306 } 307 #endif 308 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 309 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 310 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 311 cum++; 312 } 313 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 314 /* shift for next computation */ 315 cumarray += subset_size*subset_size; 316 } 317 if (pcbddc->dbg_flag) { 318 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 319 } 320 321 if (mss) { 322 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 323 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 324 /* destroy matrices (junk) */ 325 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 326 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 327 } 328 if (allocated_S_St) { 329 ierr = PetscFree2(S,St);CHKERRQ(ierr); 330 } 331 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 332 #if defined(PETSC_USE_COMPLEX) 333 ierr = PetscFree(rwork);CHKERRQ(ierr); 334 #endif 335 if (pcbddc->dbg_flag) { 336 PetscInt maxneigs_r; 337 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 338 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 339 } 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "PCBDDCSetUpSolvers" 345 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 346 { 347 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 348 PetscScalar *coarse_submat_vals; 349 PetscErrorCode ierr; 350 351 PetscFunctionBegin; 352 /* Setup local scatters R_to_B and (optionally) R_to_D */ 353 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 354 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 355 356 /* Setup local neumann solver ksp_R */ 357 /* PCBDDCSetUpLocalScatters should be called first! */ 358 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 359 360 /* Change global null space passed in by the user if change of basis has been requested */ 361 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 362 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 363 } 364 365 /* 366 Setup local correction and local part of coarse basis. 367 Gives back the dense local part of the coarse matrix in column major ordering 368 */ 369 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 370 371 /* Compute total number of coarse nodes and setup coarse solver */ 372 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 373 374 /* free */ 375 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378 379 #undef __FUNCT__ 380 #define __FUNCT__ "PCBDDCResetCustomization" 381 PetscErrorCode PCBDDCResetCustomization(PC pc) 382 { 383 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 384 PetscErrorCode ierr; 385 386 PetscFunctionBegin; 387 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 390 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 392 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 393 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 394 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 395 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 396 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 397 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 #undef __FUNCT__ 402 #define __FUNCT__ "PCBDDCResetTopography" 403 PetscErrorCode PCBDDCResetTopography(PC pc) 404 { 405 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 406 PetscErrorCode ierr; 407 408 PetscFunctionBegin; 409 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 411 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 412 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 413 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "PCBDDCResetSolvers" 419 PetscErrorCode PCBDDCResetSolvers(PC pc) 420 { 421 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 422 PetscScalar *array; 423 PetscErrorCode ierr; 424 425 PetscFunctionBegin; 426 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 427 if (pcbddc->coarse_phi_B) { 428 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 429 ierr = PetscFree(array);CHKERRQ(ierr); 430 } 431 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 432 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 437 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 438 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 440 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 441 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 442 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 443 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 444 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 445 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 446 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 447 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 448 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 449 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 450 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 451 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 452 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 459 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 460 { 461 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 462 PC_IS *pcis = (PC_IS*)pc->data; 463 VecType impVecType; 464 PetscInt n_constraints,n_R,old_size; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 469 /* get sizes */ 470 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 471 n_R = pcis->n-pcbddc->n_vertices; 472 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 473 /* local work vectors (try to avoid unneeded work)*/ 474 /* R nodes */ 475 old_size = -1; 476 if (pcbddc->vec1_R) { 477 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 478 } 479 if (n_R != old_size) { 480 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 482 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 483 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 484 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 485 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 486 } 487 /* local primal dofs */ 488 old_size = -1; 489 if (pcbddc->vec1_P) { 490 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 491 } 492 if (pcbddc->local_primal_size != old_size) { 493 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 494 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 495 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 496 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 497 } 498 /* local explicit constraints */ 499 old_size = -1; 500 if (pcbddc->vec1_C) { 501 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 502 } 503 if (n_constraints && n_constraints != old_size) { 504 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 505 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 506 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 507 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 508 } 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "PCBDDCSetUpCorrection" 514 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 515 { 516 PetscErrorCode ierr; 517 /* pointers to pcis and pcbddc */ 518 PC_IS* pcis = (PC_IS*)pc->data; 519 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 520 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 521 /* submatrices of local problem */ 522 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 523 /* submatrices of local coarse problem */ 524 Mat S_VV,S_CV,S_VC,S_CC; 525 /* working matrices */ 526 Mat C_CR; 527 /* additional working stuff */ 528 PC pc_R; 529 Mat F; 530 PetscBool isLU,isCHOL,isILU; 531 532 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 533 PetscScalar *work; 534 PetscInt *idx_V_B; 535 PetscInt n,n_vertices,n_constraints; 536 PetscInt i,n_R,n_D,n_B; 537 PetscBool unsymmetric_check; 538 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 539 MatType impMatType; 540 /* some shortcuts to scalars */ 541 PetscScalar one=1.0,m_one=-1.0; 542 543 PetscFunctionBegin; 544 n_vertices = pcbddc->n_vertices; 545 n_constraints = pcbddc->local_primal_size-n_vertices; 546 /* Set Non-overlapping dimensions */ 547 n_B = pcis->n_B; 548 n_D = pcis->n - n_B; 549 n_R = pcis->n - n_vertices; 550 551 /* Set types for local objects needed by BDDC precondtioner */ 552 impMatType = MATSEQDENSE; 553 554 /* vertices in boundary numbering */ 555 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 556 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 557 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 558 559 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 560 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 568 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 569 570 unsymmetric_check = PETSC_FALSE; 571 /* allocate workspace */ 572 n = 0; 573 if (n_constraints) { 574 n += n_R*n_constraints; 575 } 576 if (n_vertices) { 577 n = PetscMax(2*n_R*n_vertices,n); 578 n = PetscMax((n_R+n_B)*n_vertices,n); 579 } 580 if (!pcbddc->symmetric_primal) { 581 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 582 unsymmetric_check = PETSC_TRUE; 583 } 584 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 585 586 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 587 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 590 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 591 if (isLU || isILU || isCHOL) { 592 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 593 } else if (sub_schurs->reuse_mumps) { 594 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 595 MatFactorType type; 596 597 F = reuse_mumps->F; 598 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 599 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 600 } else { 601 F = NULL; 602 } 603 604 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 605 if (n_constraints) { 606 Mat M1,M2,M3; 607 Mat auxmat; 608 IS is_aux; 609 PetscScalar *array,*array2; 610 611 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 614 /* Extract constraints on R nodes: C_{CR} */ 615 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 616 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 617 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 618 619 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 620 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 621 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 622 for (i=0;i<n_constraints;i++) { 623 const PetscScalar *row_cmat_values; 624 const PetscInt *row_cmat_indices; 625 PetscInt size_of_constraint,j; 626 627 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 628 for (j=0;j<size_of_constraint;j++) { 629 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 630 } 631 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 632 } 633 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 634 if (F) { 635 Mat B; 636 637 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 638 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 639 ierr = MatDestroy(&B);CHKERRQ(ierr); 640 } else { 641 PetscScalar *marr; 642 643 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 for (i=0;i<n_constraints;i++) { 645 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 646 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 650 } 651 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 652 } 653 if (!pcbddc->switch_static) { 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 655 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 656 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 660 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 661 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 662 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 663 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 664 } 665 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 666 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 667 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 668 } else { 669 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 670 pcbddc->local_auxmat2 = local_auxmat2_R; 671 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 672 } 673 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 674 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 675 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 677 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 678 if (isCHOL) { 679 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 680 } else { 681 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 682 } 683 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 684 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 685 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 686 ierr = MatDestroy(&M2);CHKERRQ(ierr); 687 ierr = MatDestroy(&M3);CHKERRQ(ierr); 688 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 689 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 690 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 691 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 692 ierr = MatDestroy(&M1);CHKERRQ(ierr); 693 } 694 /* Get submatrices from subdomain matrix */ 695 if (n_vertices) { 696 IS is_aux; 697 698 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 699 IS tis; 700 701 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 702 ierr = ISSort(tis);CHKERRQ(ierr); 703 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 704 ierr = ISDestroy(&tis);CHKERRQ(ierr); 705 } else { 706 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 707 } 708 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 709 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 710 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 711 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 712 } 713 714 /* Matrix of coarse basis functions (local) */ 715 if (pcbddc->coarse_phi_B) { 716 PetscInt on_B,on_primal,on_D=n_D; 717 if (pcbddc->coarse_phi_D) { 718 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 719 } 720 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 721 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 722 PetscScalar *marray; 723 724 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 725 ierr = PetscFree(marray);CHKERRQ(ierr); 726 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 727 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 728 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 729 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 730 } 731 } 732 733 if (!pcbddc->coarse_phi_B) { 734 PetscScalar *marray; 735 736 n = n_B*pcbddc->local_primal_size; 737 if (pcbddc->switch_static || pcbddc->dbg_flag) { 738 n += n_D*pcbddc->local_primal_size; 739 } 740 if (!pcbddc->symmetric_primal) { 741 n *= 2; 742 } 743 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 745 n = n_B*pcbddc->local_primal_size; 746 if (pcbddc->switch_static || pcbddc->dbg_flag) { 747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 748 n += n_D*pcbddc->local_primal_size; 749 } 750 if (!pcbddc->symmetric_primal) { 751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 752 if (pcbddc->switch_static || pcbddc->dbg_flag) { 753 n = n_B*pcbddc->local_primal_size; 754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 755 } 756 } else { 757 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 758 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 760 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 761 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 762 } 763 } 764 } 765 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 766 /* vertices */ 767 if (n_vertices) { 768 769 ierr = MatConvert(A_VV,impMatType,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 770 771 if (n_R) { 772 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 773 PetscBLASInt B_N,B_one = 1; 774 PetscScalar *x,*y; 775 PetscBool isseqaij; 776 777 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 778 ierr = MatConvert(A_RV,impMatType,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 780 if (F) { /* TODO could be optimized for symmetric problems */ 781 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 782 } else { 783 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 784 for (i=0;i<n_vertices;i++) { 785 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 786 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 787 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 788 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 789 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 790 } 791 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 792 } 793 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 794 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 795 if (n_constraints) { 796 Mat B; 797 798 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 799 for (i=0;i<n_vertices;i++) { 800 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 801 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 802 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 803 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 804 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 805 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 806 } 807 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 808 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 809 ierr = MatDestroy(&B);CHKERRQ(ierr); 810 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 811 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 812 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 813 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 814 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 815 ierr = MatDestroy(&B);CHKERRQ(ierr); 816 } 817 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 818 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 819 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 820 } 821 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 822 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 823 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 824 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 825 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 826 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 827 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 828 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 829 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 830 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 831 } else { 832 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 833 } 834 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 835 /* coarse basis functions */ 836 for (i=0;i<n_vertices;i++) { 837 PetscScalar *y; 838 839 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 840 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 841 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 842 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 y[n_B*i+idx_V_B[i]] = 1.0; 845 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 846 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 847 848 if (pcbddc->switch_static || pcbddc->dbg_flag) { 849 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 850 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 851 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 853 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 854 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 855 } 856 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 857 } 858 /* if n_R == 0 the object is not destroyed */ 859 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 860 } 861 862 if (n_constraints) { 863 Mat B; 864 865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 866 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 867 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 868 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 869 if (n_vertices) { 870 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 871 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 872 } else { 873 Mat S_VCt; 874 875 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 876 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 877 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 878 } 879 } 880 ierr = MatDestroy(&B);CHKERRQ(ierr); 881 /* coarse basis functions */ 882 for (i=0;i<n_constraints;i++) { 883 PetscScalar *y; 884 885 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 886 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 887 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 888 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 891 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 892 if (pcbddc->switch_static || pcbddc->dbg_flag) { 893 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 894 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 895 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 898 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 899 } 900 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 901 } 902 } 903 if (n_constraints) { 904 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 905 } 906 907 /* compute other basis functions for non-symmetric problems */ 908 if (!pcbddc->symmetric_primal) { 909 910 if (n_constraints) { 911 Mat S_CCT,B_C; 912 913 /* this is a lazy thing */ 914 ierr = MatConvert(C_CR,impMatType,MAT_INPLACE_MATRIX,&C_CR);CHKERRQ(ierr); 915 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 916 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 917 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 918 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 919 if (n_vertices) { 920 Mat B_V,S_VCT; 921 922 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 923 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 924 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 925 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 926 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 927 } 928 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 929 } else { /* if there are no constraints, reset work */ 930 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 931 } 932 if (n_vertices && n_R) { 933 Mat A_VRT; 934 PetscScalar *marray; 935 PetscBLASInt B_N,B_one = 1; 936 937 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 938 ierr = MatConvert(A_VRT,impMatType,MAT_INPLACE_MATRIX,&A_VRT);CHKERRQ(ierr); 939 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 940 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 941 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 942 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 943 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 944 } 945 946 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 947 for (i=0;i<pcbddc->local_primal_size;i++) { 948 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 949 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 950 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 951 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 952 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 953 } 954 } else { 955 for (i=0;i<pcbddc->local_primal_size;i++) { 956 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 957 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 958 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 959 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 960 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 961 } 962 } 963 /* coarse basis functions */ 964 for (i=0;i<pcbddc->local_primal_size;i++) { 965 PetscScalar *y; 966 967 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 968 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 969 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 970 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 971 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 if (i<n_vertices) { 973 y[n_B*i+idx_V_B[i]] = 1.0; 974 } 975 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 976 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 977 978 if (pcbddc->switch_static || pcbddc->dbg_flag) { 979 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 980 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 981 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 982 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 984 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 985 } 986 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 987 } 988 } 989 /* free memory */ 990 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 991 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 992 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 993 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 994 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 995 ierr = PetscFree(work);CHKERRQ(ierr); 996 if (n_vertices) { 997 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 998 } 999 if (n_constraints) { 1000 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1001 } 1002 /* Checking coarse_sub_mat and coarse basis functios */ 1003 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1004 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1005 if (pcbddc->dbg_flag) { 1006 Mat coarse_sub_mat; 1007 Mat AUXMAT,TM1,TM2,TM3,TM4; 1008 Mat coarse_phi_D,coarse_phi_B; 1009 Mat coarse_psi_D,coarse_psi_B; 1010 Mat A_II,A_BB,A_IB,A_BI; 1011 Mat C_B,CPHI; 1012 IS is_dummy; 1013 Vec mones; 1014 MatType checkmattype=MATSEQAIJ; 1015 PetscReal real_value; 1016 1017 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1018 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1019 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1020 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1021 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1022 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1023 if (unsymmetric_check) { 1024 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1025 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1026 } 1027 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1028 1029 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1030 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1032 if (unsymmetric_check) { 1033 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1034 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1035 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1036 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1037 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1038 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1039 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1040 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1041 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1042 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1043 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1044 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1045 } else { 1046 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1047 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1048 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1049 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1050 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1051 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1053 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1054 } 1055 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1056 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1057 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1058 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 1059 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1060 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1061 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1062 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1063 1064 /* check constraints */ 1065 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1066 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1067 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1068 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1069 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1070 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1071 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1073 if (unsymmetric_check) { 1074 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1075 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1076 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1077 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1079 } 1080 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1081 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1082 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1083 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1084 1085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1086 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1087 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1088 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1089 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1090 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1091 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1092 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1093 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1094 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1095 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1096 if (unsymmetric_check) { 1097 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1098 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1099 } 1100 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1101 } 1102 /* get back data */ 1103 *coarse_submat_vals_n = coarse_submat_vals; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1109 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1110 { 1111 Mat *work_mat; 1112 IS isrow_s,iscol_s; 1113 PetscBool rsorted,csorted; 1114 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1119 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1120 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1121 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1122 1123 if (!rsorted) { 1124 const PetscInt *idxs; 1125 PetscInt *idxs_sorted,i; 1126 1127 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1128 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1129 for (i=0;i<rsize;i++) { 1130 idxs_perm_r[i] = i; 1131 } 1132 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1133 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1134 for (i=0;i<rsize;i++) { 1135 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1136 } 1137 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1138 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1139 } else { 1140 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1141 isrow_s = isrow; 1142 } 1143 1144 if (!csorted) { 1145 if (isrow == iscol) { 1146 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1147 iscol_s = isrow_s; 1148 } else { 1149 const PetscInt *idxs; 1150 PetscInt *idxs_sorted,i; 1151 1152 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1153 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1154 for (i=0;i<csize;i++) { 1155 idxs_perm_c[i] = i; 1156 } 1157 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1158 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1159 for (i=0;i<csize;i++) { 1160 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1161 } 1162 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1163 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1164 } 1165 } else { 1166 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1167 iscol_s = iscol; 1168 } 1169 1170 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1171 1172 if (!rsorted || !csorted) { 1173 Mat new_mat; 1174 IS is_perm_r,is_perm_c; 1175 1176 if (!rsorted) { 1177 PetscInt *idxs_r,i; 1178 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1179 for (i=0;i<rsize;i++) { 1180 idxs_r[idxs_perm_r[i]] = i; 1181 } 1182 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1183 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1184 } else { 1185 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1186 } 1187 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1188 1189 if (!csorted) { 1190 if (isrow_s == iscol_s) { 1191 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1192 is_perm_c = is_perm_r; 1193 } else { 1194 PetscInt *idxs_c,i; 1195 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1196 for (i=0;i<csize;i++) { 1197 idxs_c[idxs_perm_c[i]] = i; 1198 } 1199 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1200 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1201 } 1202 } else { 1203 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1204 } 1205 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1206 1207 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1208 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1209 work_mat[0] = new_mat; 1210 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1211 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1212 } 1213 1214 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1215 *B = work_mat[0]; 1216 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1217 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1218 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1224 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1225 { 1226 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1227 PC_IS* pcis = (PC_IS*)pc->data; 1228 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1229 Mat new_mat; 1230 IS is_local,is_global; 1231 PetscInt local_size; 1232 PetscBool isseqaij; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1237 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1238 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1239 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1240 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1241 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1242 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1243 1244 /* check */ 1245 if (pcbddc->dbg_flag) { 1246 Vec x,x_change; 1247 PetscReal error; 1248 1249 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1250 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1251 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1252 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1255 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1257 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1258 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1259 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1260 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1261 ierr = VecDestroy(&x);CHKERRQ(ierr); 1262 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1263 } 1264 1265 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1266 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1267 if (isseqaij) { 1268 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1269 } else { 1270 Mat work_mat; 1271 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1272 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1273 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1274 } 1275 if (matis->A->symmetric_set) { 1276 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1277 #if !defined(PETSC_USE_COMPLEX) 1278 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1279 #endif 1280 } 1281 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1287 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1288 { 1289 PC_IS* pcis = (PC_IS*)(pc->data); 1290 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1291 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1292 PetscInt *idx_R_local=NULL; 1293 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1294 PetscInt vbs,bs; 1295 PetscBT bitmask=NULL; 1296 PetscErrorCode ierr; 1297 1298 PetscFunctionBegin; 1299 /* 1300 No need to setup local scatters if 1301 - primal space is unchanged 1302 AND 1303 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1304 AND 1305 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1306 */ 1307 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1308 PetscFunctionReturn(0); 1309 } 1310 /* destroy old objects */ 1311 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1312 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1313 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1314 /* Set Non-overlapping dimensions */ 1315 n_B = pcis->n_B; 1316 n_D = pcis->n - n_B; 1317 n_vertices = pcbddc->n_vertices; 1318 1319 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1320 1321 /* create auxiliary bitmask and allocate workspace */ 1322 if (!sub_schurs->reuse_mumps) { 1323 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1324 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1325 for (i=0;i<n_vertices;i++) { 1326 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1327 } 1328 1329 for (i=0, n_R=0; i<pcis->n; i++) { 1330 if (!PetscBTLookup(bitmask,i)) { 1331 idx_R_local[n_R++] = i; 1332 } 1333 } 1334 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1335 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1336 1337 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1338 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1339 } 1340 1341 /* Block code */ 1342 vbs = 1; 1343 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1344 if (bs>1 && !(n_vertices%bs)) { 1345 PetscBool is_blocked = PETSC_TRUE; 1346 PetscInt *vary; 1347 if (!sub_schurs->reuse_mumps) { 1348 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1349 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1350 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1351 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1352 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1353 for (i=0; i<pcis->n/bs; i++) { 1354 if (vary[i]!=0 && vary[i]!=bs) { 1355 is_blocked = PETSC_FALSE; 1356 break; 1357 } 1358 } 1359 ierr = PetscFree(vary);CHKERRQ(ierr); 1360 } else { 1361 /* Verify directly the R set */ 1362 for (i=0; i<n_R/bs; i++) { 1363 PetscInt j,node=idx_R_local[bs*i]; 1364 for (j=1; j<bs; j++) { 1365 if (node != idx_R_local[bs*i+j]-j) { 1366 is_blocked = PETSC_FALSE; 1367 break; 1368 } 1369 } 1370 } 1371 } 1372 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1373 vbs = bs; 1374 for (i=0;i<n_R/vbs;i++) { 1375 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1376 } 1377 } 1378 } 1379 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1380 if (sub_schurs->reuse_mumps) { 1381 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1382 1383 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1384 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1385 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1386 reuse_mumps->is_R = pcbddc->is_R_local; 1387 } else { 1388 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1389 } 1390 1391 /* print some info if requested */ 1392 if (pcbddc->dbg_flag) { 1393 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1394 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1395 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1396 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1397 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1398 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1399 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1400 } 1401 1402 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1403 if (!sub_schurs->reuse_mumps) { 1404 IS is_aux1,is_aux2; 1405 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1406 1407 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1408 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1409 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1410 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1411 for (i=0; i<n_D; i++) { 1412 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1413 } 1414 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1415 for (i=0, j=0; i<n_R; i++) { 1416 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1417 aux_array1[j++] = i; 1418 } 1419 } 1420 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1421 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1422 for (i=0, j=0; i<n_B; i++) { 1423 if (!PetscBTLookup(bitmask,is_indices[i])) { 1424 aux_array2[j++] = i; 1425 } 1426 } 1427 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1428 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1429 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1430 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1431 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1432 1433 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1434 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1435 for (i=0, j=0; i<n_R; i++) { 1436 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1437 aux_array1[j++] = i; 1438 } 1439 } 1440 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1441 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1442 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1443 } 1444 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1445 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1446 } else { 1447 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1448 IS tis; 1449 PetscInt schur_size; 1450 1451 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1452 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1453 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1454 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1455 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1456 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1457 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1458 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1459 } 1460 } 1461 PetscFunctionReturn(0); 1462 } 1463 1464 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1467 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1468 { 1469 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1470 PC_IS *pcis = (PC_IS*)pc->data; 1471 PC pc_temp; 1472 Mat A_RR; 1473 MatReuse reuse; 1474 PetscScalar m_one = -1.0; 1475 PetscReal value; 1476 PetscInt n_D,n_R; 1477 PetscBool use_exact,use_exact_reduced,issbaij; 1478 PetscErrorCode ierr; 1479 /* prefixes stuff */ 1480 char dir_prefix[256],neu_prefix[256],str_level[16]; 1481 size_t len; 1482 1483 PetscFunctionBegin; 1484 1485 /* compute prefixes */ 1486 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1487 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1488 if (!pcbddc->current_level) { 1489 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1490 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1491 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1492 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1493 } else { 1494 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1495 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1496 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1497 len -= 15; /* remove "pc_bddc_coarse_" */ 1498 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1499 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1500 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1501 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1502 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1503 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1504 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1505 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1506 } 1507 1508 /* DIRICHLET PROBLEM */ 1509 if (dirichlet) { 1510 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1511 if (pcbddc->local_mat->symmetric_set) { 1512 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1513 } 1514 /* Matrix for Dirichlet problem is pcis->A_II */ 1515 n_D = pcis->n - pcis->n_B; 1516 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1517 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1518 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1519 /* default */ 1520 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1521 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1522 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1523 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1524 if (issbaij) { 1525 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1526 } else { 1527 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1528 } 1529 /* Allow user's customization */ 1530 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1531 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1532 } 1533 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1534 if (sub_schurs->reuse_mumps) { 1535 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1536 1537 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1538 } 1539 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1540 if (!n_D) { 1541 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1542 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1543 } 1544 /* Set Up KSP for Dirichlet problem of BDDC */ 1545 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1546 /* set ksp_D into pcis data */ 1547 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1548 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1549 pcis->ksp_D = pcbddc->ksp_D; 1550 } 1551 1552 /* NEUMANN PROBLEM */ 1553 A_RR = 0; 1554 if (neumann) { 1555 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1556 PetscInt ibs,mbs; 1557 PetscBool issbaij; 1558 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1559 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1560 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1561 if (pcbddc->ksp_R) { /* already created ksp */ 1562 PetscInt nn_R; 1563 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1564 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1565 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1566 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1567 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1568 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1569 reuse = MAT_INITIAL_MATRIX; 1570 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1571 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1572 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1573 reuse = MAT_INITIAL_MATRIX; 1574 } else { /* safe to reuse the matrix */ 1575 reuse = MAT_REUSE_MATRIX; 1576 } 1577 } 1578 /* last check */ 1579 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1580 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1581 reuse = MAT_INITIAL_MATRIX; 1582 } 1583 } else { /* first time, so we need to create the matrix */ 1584 reuse = MAT_INITIAL_MATRIX; 1585 } 1586 /* extract A_RR */ 1587 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1588 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1589 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1590 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1591 if (matis->A == pcbddc->local_mat) { 1592 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1593 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1594 } else { 1595 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1596 } 1597 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1598 if (matis->A == pcbddc->local_mat) { 1599 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1600 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1601 } else { 1602 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1603 } 1604 } 1605 if (!sub_schurs->reuse_mumps) { 1606 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1607 if (pcbddc->local_mat->symmetric_set) { 1608 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1609 } 1610 } else { 1611 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1612 1613 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1614 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1615 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1616 } 1617 if (!pcbddc->ksp_R) { /* create object if not present */ 1618 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1619 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1620 /* default */ 1621 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1622 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1623 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1624 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1625 if (issbaij) { 1626 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1627 } else { 1628 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1629 } 1630 /* Allow user's customization */ 1631 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1632 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1633 } 1634 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1635 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1636 if (!n_R) { 1637 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1638 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1639 } 1640 /* Reuse MUMPS solver if it is present */ 1641 if (sub_schurs->reuse_mumps) { 1642 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1643 1644 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1645 } 1646 /* Set Up KSP for Neumann problem of BDDC */ 1647 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1648 } 1649 /* free Neumann problem's matrix */ 1650 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1651 1652 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1653 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1654 if (pcbddc->dbg_flag) { 1655 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1656 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1658 } 1659 if (dirichlet) { /* Dirichlet */ 1660 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1661 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1662 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1663 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1664 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1665 /* need to be adapted? */ 1666 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1667 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1668 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1669 /* print info */ 1670 if (pcbddc->dbg_flag) { 1671 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1673 } 1674 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1675 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1676 } 1677 } 1678 if (neumann) { /* Neumann */ 1679 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1680 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1681 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1682 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1683 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1684 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1685 /* need to be adapted? */ 1686 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1687 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1688 /* print info */ 1689 if (pcbddc->dbg_flag) { 1690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1691 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1692 } 1693 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1694 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1695 } 1696 } 1697 } 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1703 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1704 { 1705 PetscErrorCode ierr; 1706 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1707 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1708 1709 PetscFunctionBegin; 1710 if (!sub_schurs->reuse_mumps) { 1711 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1712 } 1713 if (!pcbddc->switch_static) { 1714 if (applytranspose && pcbddc->local_auxmat1) { 1715 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1716 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1717 } 1718 if (!sub_schurs->reuse_mumps) { 1719 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1720 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1721 } else { 1722 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1723 1724 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1726 } 1727 } else { 1728 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1729 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1730 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1731 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1732 if (applytranspose && pcbddc->local_auxmat1) { 1733 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1734 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1735 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1736 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1737 } 1738 } 1739 if (!sub_schurs->reuse_mumps) { 1740 if (applytranspose) { 1741 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1742 } else { 1743 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1744 } 1745 } else { 1746 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1747 1748 if (applytranspose) { 1749 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1750 } else { 1751 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1752 } 1753 } 1754 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1755 if (!pcbddc->switch_static) { 1756 if (!sub_schurs->reuse_mumps) { 1757 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1758 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1759 } else { 1760 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1761 1762 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1763 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1764 } 1765 if (!applytranspose && pcbddc->local_auxmat1) { 1766 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1767 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1768 } 1769 } else { 1770 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1771 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1772 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1773 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1774 if (!applytranspose && pcbddc->local_auxmat1) { 1775 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1776 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1777 } 1778 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1781 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1782 } 1783 PetscFunctionReturn(0); 1784 } 1785 1786 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1789 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1790 { 1791 PetscErrorCode ierr; 1792 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1793 PC_IS* pcis = (PC_IS*) (pc->data); 1794 const PetscScalar zero = 0.0; 1795 1796 PetscFunctionBegin; 1797 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1798 if (applytranspose) { 1799 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1800 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1801 } else { 1802 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1803 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1804 } 1805 /* start communications from local primal nodes to rhs of coarse solver */ 1806 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1807 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1808 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1809 1810 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1811 /* TODO remove null space when doing multilevel */ 1812 if (pcbddc->coarse_ksp) { 1813 Vec rhs,sol; 1814 1815 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1816 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1817 if (applytranspose) { 1818 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1819 } else { 1820 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1821 } 1822 } 1823 1824 /* Local solution on R nodes */ 1825 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1826 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1827 } 1828 1829 /* communications from coarse sol to local primal nodes */ 1830 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1831 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1832 1833 /* Sum contributions from two levels */ 1834 if (applytranspose) { 1835 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1836 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1837 } else { 1838 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1839 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1840 } 1841 PetscFunctionReturn(0); 1842 } 1843 1844 #undef __FUNCT__ 1845 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1846 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1847 { 1848 PetscErrorCode ierr; 1849 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1850 PetscScalar *array; 1851 Vec from,to; 1852 1853 PetscFunctionBegin; 1854 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1855 from = pcbddc->coarse_vec; 1856 to = pcbddc->vec1_P; 1857 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1858 Vec tvec; 1859 1860 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1861 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1862 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1863 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1864 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1865 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1866 } 1867 } else { /* from local to global -> put data in coarse right hand side */ 1868 from = pcbddc->vec1_P; 1869 to = pcbddc->coarse_vec; 1870 } 1871 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1877 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1878 { 1879 PetscErrorCode ierr; 1880 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1881 PetscScalar *array; 1882 Vec from,to; 1883 1884 PetscFunctionBegin; 1885 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1886 from = pcbddc->coarse_vec; 1887 to = pcbddc->vec1_P; 1888 } else { /* from local to global -> put data in coarse right hand side */ 1889 from = pcbddc->vec1_P; 1890 to = pcbddc->coarse_vec; 1891 } 1892 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1893 if (smode == SCATTER_FORWARD) { 1894 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1895 Vec tvec; 1896 1897 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1898 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1899 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1900 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1901 } 1902 } else { 1903 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1904 ierr = VecResetArray(from);CHKERRQ(ierr); 1905 } 1906 } 1907 PetscFunctionReturn(0); 1908 } 1909 1910 /* uncomment for testing purposes */ 1911 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1914 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1915 { 1916 PetscErrorCode ierr; 1917 PC_IS* pcis = (PC_IS*)(pc->data); 1918 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1919 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1920 /* one and zero */ 1921 PetscScalar one=1.0,zero=0.0; 1922 /* space to store constraints and their local indices */ 1923 PetscScalar *constraints_data; 1924 PetscInt *constraints_idxs,*constraints_idxs_B; 1925 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1926 PetscInt *constraints_n; 1927 /* iterators */ 1928 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1929 /* BLAS integers */ 1930 PetscBLASInt lwork,lierr; 1931 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1932 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1933 /* reuse */ 1934 PetscInt olocal_primal_size,olocal_primal_size_cc; 1935 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1936 /* change of basis */ 1937 PetscBool qr_needed; 1938 PetscBT change_basis,qr_needed_idx; 1939 /* auxiliary stuff */ 1940 PetscInt *nnz,*is_indices; 1941 PetscInt ncc; 1942 /* some quantities */ 1943 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1944 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1945 1946 PetscFunctionBegin; 1947 /* Destroy Mat objects computed previously */ 1948 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1949 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1950 /* save info on constraints from previous setup (if any) */ 1951 olocal_primal_size = pcbddc->local_primal_size; 1952 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1953 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1954 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1955 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1956 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1957 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1958 1959 /* print some info */ 1960 if (pcbddc->dbg_flag) { 1961 IS vertices; 1962 PetscInt nv,nedges,nfaces; 1963 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1964 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1965 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1966 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1967 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1968 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1969 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1971 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1972 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1973 } 1974 1975 if (!pcbddc->adaptive_selection) { 1976 IS ISForVertices,*ISForFaces,*ISForEdges; 1977 MatNullSpace nearnullsp; 1978 const Vec *nearnullvecs; 1979 Vec *localnearnullsp; 1980 PetscScalar *array; 1981 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1982 PetscBool nnsp_has_cnst; 1983 /* LAPACK working arrays for SVD or POD */ 1984 PetscBool skip_lapack,boolforchange; 1985 PetscScalar *work; 1986 PetscReal *singular_vals; 1987 #if defined(PETSC_USE_COMPLEX) 1988 PetscReal *rwork; 1989 #endif 1990 #if defined(PETSC_MISSING_LAPACK_GESVD) 1991 PetscScalar *temp_basis,*correlation_mat; 1992 #else 1993 PetscBLASInt dummy_int=1; 1994 PetscScalar dummy_scalar=1.; 1995 #endif 1996 1997 /* Get index sets for faces, edges and vertices from graph */ 1998 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1999 /* free unneeded index sets */ 2000 if (!pcbddc->use_vertices) { 2001 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2002 } 2003 if (!pcbddc->use_edges) { 2004 for (i=0;i<n_ISForEdges;i++) { 2005 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2006 } 2007 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2008 n_ISForEdges = 0; 2009 } 2010 if (!pcbddc->use_faces) { 2011 for (i=0;i<n_ISForFaces;i++) { 2012 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2013 } 2014 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2015 n_ISForFaces = 0; 2016 } 2017 2018 #if defined(PETSC_USE_DEBUG) 2019 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2020 Also use_change_of_basis should be consistent among processors */ 2021 if (pcbddc->NullSpace) { 2022 PetscBool tbool[2],gbool[2]; 2023 2024 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2025 pcbddc->use_change_of_basis = PETSC_TRUE; 2026 if (!ISForEdges) { 2027 pcbddc->use_change_on_faces = PETSC_TRUE; 2028 } 2029 } 2030 tbool[0] = pcbddc->use_change_of_basis; 2031 tbool[1] = pcbddc->use_change_on_faces; 2032 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2033 pcbddc->use_change_of_basis = gbool[0]; 2034 pcbddc->use_change_on_faces = gbool[1]; 2035 } 2036 #endif 2037 2038 /* check if near null space is attached to global mat */ 2039 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2040 if (nearnullsp) { 2041 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2042 /* remove any stored info */ 2043 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2044 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2045 /* store information for BDDC solver reuse */ 2046 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2047 pcbddc->onearnullspace = nearnullsp; 2048 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2049 for (i=0;i<nnsp_size;i++) { 2050 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2051 } 2052 } else { /* if near null space is not provided BDDC uses constants by default */ 2053 nnsp_size = 0; 2054 nnsp_has_cnst = PETSC_TRUE; 2055 } 2056 /* get max number of constraints on a single cc */ 2057 max_constraints = nnsp_size; 2058 if (nnsp_has_cnst) max_constraints++; 2059 2060 /* 2061 Evaluate maximum storage size needed by the procedure 2062 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2063 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2064 There can be multiple constraints per connected component 2065 */ 2066 n_vertices = 0; 2067 if (ISForVertices) { 2068 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2069 } 2070 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2071 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2072 2073 total_counts = n_ISForFaces+n_ISForEdges; 2074 total_counts *= max_constraints; 2075 total_counts += n_vertices; 2076 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2077 2078 total_counts = 0; 2079 max_size_of_constraint = 0; 2080 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2081 IS used_is; 2082 if (i<n_ISForEdges) { 2083 used_is = ISForEdges[i]; 2084 } else { 2085 used_is = ISForFaces[i-n_ISForEdges]; 2086 } 2087 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2088 total_counts += j; 2089 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2090 } 2091 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2092 2093 /* get local part of global near null space vectors */ 2094 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2095 for (k=0;k<nnsp_size;k++) { 2096 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2097 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2098 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2099 } 2100 2101 /* whether or not to skip lapack calls */ 2102 skip_lapack = PETSC_TRUE; 2103 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2104 2105 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2106 if (!skip_lapack) { 2107 PetscScalar temp_work; 2108 2109 #if defined(PETSC_MISSING_LAPACK_GESVD) 2110 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2111 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2112 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2113 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2114 #if defined(PETSC_USE_COMPLEX) 2115 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2116 #endif 2117 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2118 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2119 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2120 lwork = -1; 2121 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2122 #if !defined(PETSC_USE_COMPLEX) 2123 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2124 #else 2125 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2126 #endif 2127 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2128 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2129 #else /* on missing GESVD */ 2130 /* SVD */ 2131 PetscInt max_n,min_n; 2132 max_n = max_size_of_constraint; 2133 min_n = max_constraints; 2134 if (max_size_of_constraint < max_constraints) { 2135 min_n = max_size_of_constraint; 2136 max_n = max_constraints; 2137 } 2138 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2139 #if defined(PETSC_USE_COMPLEX) 2140 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2141 #endif 2142 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2143 lwork = -1; 2144 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2145 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2146 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2147 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2148 #if !defined(PETSC_USE_COMPLEX) 2149 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2150 #else 2151 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2152 #endif 2153 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2154 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2155 #endif /* on missing GESVD */ 2156 /* Allocate optimal workspace */ 2157 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2158 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2159 } 2160 /* Now we can loop on constraining sets */ 2161 total_counts = 0; 2162 constraints_idxs_ptr[0] = 0; 2163 constraints_data_ptr[0] = 0; 2164 /* vertices */ 2165 if (n_vertices) { 2166 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2167 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2168 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2169 for (i=0;i<n_vertices;i++) { 2170 constraints_n[total_counts] = 1; 2171 constraints_data[total_counts] = 1.0; 2172 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2173 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2174 total_counts++; 2175 } 2176 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2177 PetscBool used_vertex; 2178 for (i=0;i<n_vertices;i++) { 2179 used_vertex = PETSC_FALSE; 2180 k = 0; 2181 while (!used_vertex && k<nnsp_size) { 2182 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2183 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2184 constraints_n[total_counts] = 1; 2185 constraints_idxs[total_counts] = is_indices[i]; 2186 constraints_data[total_counts] = 1.0; 2187 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2188 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2189 total_counts++; 2190 used_vertex = PETSC_TRUE; 2191 } 2192 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2193 k++; 2194 } 2195 } 2196 } 2197 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2198 n_vertices = total_counts; 2199 } 2200 2201 /* edges and faces */ 2202 total_counts_cc = total_counts; 2203 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2204 IS used_is; 2205 PetscBool idxs_copied = PETSC_FALSE; 2206 2207 if (ncc<n_ISForEdges) { 2208 used_is = ISForEdges[ncc]; 2209 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2210 } else { 2211 used_is = ISForFaces[ncc-n_ISForEdges]; 2212 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2213 } 2214 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2215 2216 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2217 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2218 /* change of basis should not be performed on local periodic nodes */ 2219 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2220 if (nnsp_has_cnst) { 2221 PetscScalar quad_value; 2222 2223 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2224 idxs_copied = PETSC_TRUE; 2225 2226 if (!pcbddc->use_nnsp_true) { 2227 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2228 } else { 2229 quad_value = 1.0; 2230 } 2231 for (j=0;j<size_of_constraint;j++) { 2232 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2233 } 2234 temp_constraints++; 2235 total_counts++; 2236 } 2237 for (k=0;k<nnsp_size;k++) { 2238 PetscReal real_value; 2239 PetscScalar *ptr_to_data; 2240 2241 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2242 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2243 for (j=0;j<size_of_constraint;j++) { 2244 ptr_to_data[j] = array[is_indices[j]]; 2245 } 2246 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2247 /* check if array is null on the connected component */ 2248 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2249 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2250 if (real_value > 0.0) { /* keep indices and values */ 2251 temp_constraints++; 2252 total_counts++; 2253 if (!idxs_copied) { 2254 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2255 idxs_copied = PETSC_TRUE; 2256 } 2257 } 2258 } 2259 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2260 valid_constraints = temp_constraints; 2261 if (!pcbddc->use_nnsp_true && temp_constraints) { 2262 if (temp_constraints == 1) { /* just normalize the constraint */ 2263 PetscScalar norm,*ptr_to_data; 2264 2265 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2266 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2267 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2268 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2269 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2270 } else { /* perform SVD */ 2271 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2272 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2273 2274 #if defined(PETSC_MISSING_LAPACK_GESVD) 2275 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2276 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2277 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2278 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2279 from that computed using LAPACKgesvd 2280 -> This is due to a different computation of eigenvectors in LAPACKheev 2281 -> The quality of the POD-computed basis will be the same */ 2282 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2283 /* Store upper triangular part of correlation matrix */ 2284 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2285 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2286 for (j=0;j<temp_constraints;j++) { 2287 for (k=0;k<j+1;k++) { 2288 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2289 } 2290 } 2291 /* compute eigenvalues and eigenvectors of correlation matrix */ 2292 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2293 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2294 #if !defined(PETSC_USE_COMPLEX) 2295 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2296 #else 2297 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2298 #endif 2299 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2300 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2301 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2302 j = 0; 2303 while (j < temp_constraints && singular_vals[j] < tol) j++; 2304 total_counts = total_counts-j; 2305 valid_constraints = temp_constraints-j; 2306 /* scale and copy POD basis into used quadrature memory */ 2307 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2308 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2309 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2310 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2311 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2312 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2313 if (j<temp_constraints) { 2314 PetscInt ii; 2315 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2316 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2317 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2318 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2319 for (k=0;k<temp_constraints-j;k++) { 2320 for (ii=0;ii<size_of_constraint;ii++) { 2321 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2322 } 2323 } 2324 } 2325 #else /* on missing GESVD */ 2326 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2327 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2328 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2329 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2330 #if !defined(PETSC_USE_COMPLEX) 2331 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2332 #else 2333 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2334 #endif 2335 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2336 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2337 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2338 k = temp_constraints; 2339 if (k > size_of_constraint) k = size_of_constraint; 2340 j = 0; 2341 while (j < k && singular_vals[k-j-1] < tol) j++; 2342 valid_constraints = k-j; 2343 total_counts = total_counts-temp_constraints+valid_constraints; 2344 #endif /* on missing GESVD */ 2345 } 2346 } 2347 /* update pointers information */ 2348 if (valid_constraints) { 2349 constraints_n[total_counts_cc] = valid_constraints; 2350 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2351 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2352 /* set change_of_basis flag */ 2353 if (boolforchange) { 2354 PetscBTSet(change_basis,total_counts_cc); 2355 } 2356 total_counts_cc++; 2357 } 2358 } 2359 /* free workspace */ 2360 if (!skip_lapack) { 2361 ierr = PetscFree(work);CHKERRQ(ierr); 2362 #if defined(PETSC_USE_COMPLEX) 2363 ierr = PetscFree(rwork);CHKERRQ(ierr); 2364 #endif 2365 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2366 #if defined(PETSC_MISSING_LAPACK_GESVD) 2367 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2368 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2369 #endif 2370 } 2371 for (k=0;k<nnsp_size;k++) { 2372 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2373 } 2374 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2375 /* free index sets of faces, edges and vertices */ 2376 for (i=0;i<n_ISForFaces;i++) { 2377 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2378 } 2379 if (n_ISForFaces) { 2380 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2381 } 2382 for (i=0;i<n_ISForEdges;i++) { 2383 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2384 } 2385 if (n_ISForEdges) { 2386 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2387 } 2388 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2389 } else { 2390 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2391 2392 total_counts = 0; 2393 n_vertices = 0; 2394 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2395 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2396 } 2397 max_constraints = 0; 2398 total_counts_cc = 0; 2399 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2400 total_counts += pcbddc->adaptive_constraints_n[i]; 2401 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2402 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2403 } 2404 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2405 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2406 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2407 constraints_data = pcbddc->adaptive_constraints_data; 2408 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2409 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2410 total_counts_cc = 0; 2411 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2412 if (pcbddc->adaptive_constraints_n[i]) { 2413 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2414 } 2415 } 2416 #if 0 2417 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2418 for (i=0;i<total_counts_cc;i++) { 2419 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2420 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2421 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2422 printf(" %d",constraints_idxs[j]); 2423 } 2424 printf("\n"); 2425 printf("number of cc: %d\n",constraints_n[i]); 2426 } 2427 for (i=0;i<n_vertices;i++) { 2428 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2429 } 2430 for (i=0;i<sub_schurs->n_subs;i++) { 2431 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2432 } 2433 #endif 2434 2435 max_size_of_constraint = 0; 2436 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2437 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2438 /* Change of basis */ 2439 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2440 if (pcbddc->use_change_of_basis) { 2441 for (i=0;i<sub_schurs->n_subs;i++) { 2442 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2443 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2444 } 2445 } 2446 } 2447 } 2448 pcbddc->local_primal_size = total_counts; 2449 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2450 2451 /* map constraints_idxs in boundary numbering */ 2452 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2453 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 2454 2455 /* Create constraint matrix */ 2456 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2457 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2458 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2459 2460 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2461 /* determine if a QR strategy is needed for change of basis */ 2462 qr_needed = PETSC_FALSE; 2463 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2464 total_primal_vertices=0; 2465 pcbddc->local_primal_size_cc = 0; 2466 for (i=0;i<total_counts_cc;i++) { 2467 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2468 if (size_of_constraint == 1) { 2469 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2470 pcbddc->local_primal_size_cc += 1; 2471 } else if (PetscBTLookup(change_basis,i)) { 2472 for (k=0;k<constraints_n[i];k++) { 2473 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2474 } 2475 pcbddc->local_primal_size_cc += constraints_n[i]; 2476 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2477 PetscBTSet(qr_needed_idx,i); 2478 qr_needed = PETSC_TRUE; 2479 } 2480 } else { 2481 pcbddc->local_primal_size_cc += 1; 2482 } 2483 } 2484 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2485 pcbddc->n_vertices = total_primal_vertices; 2486 /* permute indices in order to have a sorted set of vertices */ 2487 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2488 2489 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2490 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2491 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2492 2493 /* nonzero structure of constraint matrix */ 2494 /* and get reference dof for local constraints */ 2495 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2496 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2497 2498 j = total_primal_vertices; 2499 total_counts = total_primal_vertices; 2500 cum = total_primal_vertices; 2501 for (i=n_vertices;i<total_counts_cc;i++) { 2502 if (!PetscBTLookup(change_basis,i)) { 2503 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2504 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2505 cum++; 2506 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2507 for (k=0;k<constraints_n[i];k++) { 2508 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2509 nnz[j+k] = size_of_constraint; 2510 } 2511 j += constraints_n[i]; 2512 } 2513 } 2514 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2515 ierr = PetscFree(nnz);CHKERRQ(ierr); 2516 2517 /* set values in constraint matrix */ 2518 for (i=0;i<total_primal_vertices;i++) { 2519 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2520 } 2521 total_counts = total_primal_vertices; 2522 for (i=n_vertices;i<total_counts_cc;i++) { 2523 if (!PetscBTLookup(change_basis,i)) { 2524 PetscInt *cols; 2525 2526 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2527 cols = constraints_idxs+constraints_idxs_ptr[i]; 2528 for (k=0;k<constraints_n[i];k++) { 2529 PetscInt row = total_counts+k; 2530 PetscScalar *vals; 2531 2532 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2533 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2534 } 2535 total_counts += constraints_n[i]; 2536 } 2537 } 2538 /* assembling */ 2539 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2540 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2541 2542 /* 2543 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2544 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2545 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 2546 */ 2547 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2548 if (pcbddc->use_change_of_basis) { 2549 /* dual and primal dofs on a single cc */ 2550 PetscInt dual_dofs,primal_dofs; 2551 /* working stuff for GEQRF */ 2552 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2553 PetscBLASInt lqr_work; 2554 /* working stuff for UNGQR */ 2555 PetscScalar *gqr_work,lgqr_work_t; 2556 PetscBLASInt lgqr_work; 2557 /* working stuff for TRTRS */ 2558 PetscScalar *trs_rhs; 2559 PetscBLASInt Blas_NRHS; 2560 /* pointers for values insertion into change of basis matrix */ 2561 PetscInt *start_rows,*start_cols; 2562 PetscScalar *start_vals; 2563 /* working stuff for values insertion */ 2564 PetscBT is_primal; 2565 PetscInt *aux_primal_numbering_B; 2566 /* matrix sizes */ 2567 PetscInt global_size,local_size; 2568 /* temporary change of basis */ 2569 Mat localChangeOfBasisMatrix; 2570 /* extra space for debugging */ 2571 PetscScalar *dbg_work; 2572 2573 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2574 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2575 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2576 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2577 /* nonzeros for local mat */ 2578 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2579 for (i=0;i<pcis->n;i++) nnz[i]=1; 2580 for (i=n_vertices;i<total_counts_cc;i++) { 2581 if (PetscBTLookup(change_basis,i)) { 2582 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2583 if (PetscBTLookup(qr_needed_idx,i)) { 2584 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2585 } else { 2586 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2587 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2588 } 2589 } 2590 } 2591 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2592 ierr = PetscFree(nnz);CHKERRQ(ierr); 2593 /* Set initial identity in the matrix */ 2594 for (i=0;i<pcis->n;i++) { 2595 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2596 } 2597 2598 if (pcbddc->dbg_flag) { 2599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2600 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2601 } 2602 2603 2604 /* Now we loop on the constraints which need a change of basis */ 2605 /* 2606 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2607 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2608 2609 Basic blocks of change of basis matrix T computed by 2610 2611 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2612 2613 | 1 0 ... 0 s_1/S | 2614 | 0 1 ... 0 s_2/S | 2615 | ... | 2616 | 0 ... 1 s_{n-1}/S | 2617 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2618 2619 with S = \sum_{i=1}^n s_i^2 2620 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2621 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2622 2623 - QR decomposition of constraints otherwise 2624 */ 2625 if (qr_needed) { 2626 /* space to store Q */ 2627 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2628 /* first we issue queries for optimal work */ 2629 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2630 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2631 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2632 lqr_work = -1; 2633 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2634 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2635 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2636 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2637 lgqr_work = -1; 2638 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2639 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2640 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2641 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2642 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2643 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2644 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2645 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2646 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2647 /* array to store scaling factors for reflectors */ 2648 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2649 /* array to store rhs and solution of triangular solver */ 2650 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2651 /* allocating workspace for check */ 2652 if (pcbddc->dbg_flag) { 2653 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2654 } 2655 } 2656 /* array to store whether a node is primal or not */ 2657 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2658 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2659 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2660 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 2661 for (i=0;i<total_primal_vertices;i++) { 2662 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2663 } 2664 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2665 2666 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2667 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2668 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2669 if (PetscBTLookup(change_basis,total_counts)) { 2670 /* get constraint info */ 2671 primal_dofs = constraints_n[total_counts]; 2672 dual_dofs = size_of_constraint-primal_dofs; 2673 2674 if (pcbddc->dbg_flag) { 2675 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2676 } 2677 2678 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2679 2680 /* copy quadrature constraints for change of basis check */ 2681 if (pcbddc->dbg_flag) { 2682 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2683 } 2684 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2685 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2686 2687 /* compute QR decomposition of constraints */ 2688 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2689 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2690 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2691 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2692 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2693 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2694 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2695 2696 /* explictly compute R^-T */ 2697 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2698 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2699 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2700 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2701 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2702 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2703 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2704 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2705 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2706 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2707 2708 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2709 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2710 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2711 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2712 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2713 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2714 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2715 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2716 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2717 2718 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2719 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2720 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2721 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2726 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2727 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2728 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2729 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2730 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2731 2732 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2733 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2734 /* insert cols for primal dofs */ 2735 for (j=0;j<primal_dofs;j++) { 2736 start_vals = &qr_basis[j*size_of_constraint]; 2737 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2738 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2739 } 2740 /* insert cols for dual dofs */ 2741 for (j=0,k=0;j<dual_dofs;k++) { 2742 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2743 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2744 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2745 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2746 j++; 2747 } 2748 } 2749 2750 /* check change of basis */ 2751 if (pcbddc->dbg_flag) { 2752 PetscInt ii,jj; 2753 PetscBool valid_qr=PETSC_TRUE; 2754 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2755 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2756 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2757 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2758 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2759 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2760 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2761 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2762 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2763 for (jj=0;jj<size_of_constraint;jj++) { 2764 for (ii=0;ii<primal_dofs;ii++) { 2765 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2766 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2767 } 2768 } 2769 if (!valid_qr) { 2770 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2771 for (jj=0;jj<size_of_constraint;jj++) { 2772 for (ii=0;ii<primal_dofs;ii++) { 2773 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2774 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2775 } 2776 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2777 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2778 } 2779 } 2780 } 2781 } else { 2782 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2783 } 2784 } 2785 } else { /* simple transformation block */ 2786 PetscInt row,col; 2787 PetscScalar val,norm; 2788 2789 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2790 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2791 for (j=0;j<size_of_constraint;j++) { 2792 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2793 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2794 if (!PetscBTLookup(is_primal,row_B)) { 2795 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2796 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2797 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2798 } else { 2799 for (k=0;k<size_of_constraint;k++) { 2800 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2801 if (row != col) { 2802 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2803 } else { 2804 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2805 } 2806 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2807 } 2808 } 2809 } 2810 if (pcbddc->dbg_flag) { 2811 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2812 } 2813 } 2814 } else { 2815 if (pcbddc->dbg_flag) { 2816 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2817 } 2818 } 2819 } 2820 2821 /* free workspace */ 2822 if (qr_needed) { 2823 if (pcbddc->dbg_flag) { 2824 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2825 } 2826 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2827 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2828 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2829 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2830 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2831 } 2832 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2833 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2834 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2835 2836 /* assembling of global change of variable */ 2837 { 2838 Mat tmat; 2839 PetscInt bs; 2840 2841 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2842 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2843 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2844 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2845 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2846 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2847 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2848 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2849 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2850 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2851 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2852 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2853 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2854 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2855 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2856 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2857 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2858 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2859 } 2860 /* check */ 2861 if (pcbddc->dbg_flag) { 2862 PetscReal error; 2863 Vec x,x_change; 2864 2865 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2866 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2867 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2868 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2869 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2870 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2871 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2872 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2873 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2874 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2875 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2876 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2877 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2878 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2879 ierr = VecDestroy(&x);CHKERRQ(ierr); 2880 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2881 } 2882 2883 /* adapt sub_schurs computed (if any) */ 2884 if (pcbddc->use_deluxe_scaling) { 2885 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2886 if (sub_schurs->S_Ej_all) { 2887 Mat S_new,tmat; 2888 IS is_all_N; 2889 2890 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2891 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2892 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2893 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2894 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2895 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2896 sub_schurs->S_Ej_all = S_new; 2897 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2898 if (sub_schurs->sum_S_Ej_all) { 2899 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2900 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2901 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2902 sub_schurs->sum_S_Ej_all = S_new; 2903 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2904 } 2905 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2906 } 2907 } 2908 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2909 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2910 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2911 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2912 } 2913 2914 /* set up change of basis context */ 2915 if (pcbddc->ChangeOfBasisMatrix) { 2916 PCBDDCChange_ctx change_ctx; 2917 2918 if (!pcbddc->new_global_mat) { 2919 PetscInt global_size,local_size; 2920 2921 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2922 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2923 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2924 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2925 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2926 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2927 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2928 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2929 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2930 } else { 2931 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2932 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2933 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2934 } 2935 if (!pcbddc->user_ChangeOfBasisMatrix) { 2936 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2937 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2938 } else { 2939 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2940 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2941 } 2942 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2943 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2944 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2945 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2946 } 2947 2948 /* check if a new primal space has been introduced */ 2949 pcbddc->new_primal_space_local = PETSC_TRUE; 2950 if (olocal_primal_size == pcbddc->local_primal_size) { 2951 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2952 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2953 if (!pcbddc->new_primal_space_local) { 2954 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2955 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2956 } 2957 } 2958 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2959 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2960 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2961 2962 /* flush dbg viewer */ 2963 if (pcbddc->dbg_flag) { 2964 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2965 } 2966 2967 /* free workspace */ 2968 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2969 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2970 if (!pcbddc->adaptive_selection) { 2971 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2972 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2973 } else { 2974 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2975 pcbddc->adaptive_constraints_idxs_ptr, 2976 pcbddc->adaptive_constraints_data_ptr, 2977 pcbddc->adaptive_constraints_idxs, 2978 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2979 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2980 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2981 } 2982 PetscFunctionReturn(0); 2983 } 2984 2985 #undef __FUNCT__ 2986 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2987 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2988 { 2989 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2990 PC_IS *pcis = (PC_IS*)pc->data; 2991 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2992 PetscInt ierr,i,vertex_size,N; 2993 PetscViewer viewer=pcbddc->dbg_viewer; 2994 2995 PetscFunctionBegin; 2996 /* Reset previously computed graph */ 2997 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2998 /* Init local Graph struct */ 2999 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3000 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3001 3002 /* Check validity of the csr graph passed in by the user */ 3003 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3004 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3005 } 3006 3007 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3008 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3009 PetscInt *xadj,*adjncy; 3010 PetscInt nvtxs; 3011 PetscBool flg_row=PETSC_FALSE; 3012 3013 if (pcbddc->use_local_adj) { 3014 3015 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3016 if (flg_row) { 3017 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3018 pcbddc->computed_rowadj = PETSC_TRUE; 3019 } 3020 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3021 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3022 IS is_dummy; 3023 ISLocalToGlobalMapping l2gmap_dummy; 3024 PetscInt j,sum; 3025 PetscInt *cxadj,*cadjncy; 3026 const PetscInt *idxs; 3027 PCBDDCGraph graph; 3028 PetscBT is_on_boundary; 3029 3030 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3031 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3032 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3033 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3034 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3035 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3036 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3037 if (flg_row) { 3038 graph->xadj = xadj; 3039 graph->adjncy = adjncy; 3040 } 3041 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3042 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3043 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3044 3045 if (pcbddc->dbg_flag) { 3046 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3047 for (i=0;i<graph->ncc;i++) { 3048 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3049 } 3050 } 3051 3052 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3053 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3054 for (i=0;i<pcis->n_B;i++) { 3055 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3056 } 3057 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3058 3059 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3060 sum = 0; 3061 for (i=0;i<graph->ncc;i++) { 3062 PetscInt sizecc = 0; 3063 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3064 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3065 sizecc++; 3066 } 3067 } 3068 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3069 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3070 cxadj[graph->queue[j]] = sizecc; 3071 } 3072 } 3073 sum += sizecc*sizecc; 3074 } 3075 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3076 sum = 0; 3077 for (i=0;i<pcis->n;i++) { 3078 PetscInt temp = cxadj[i]; 3079 cxadj[i] = sum; 3080 sum += temp; 3081 } 3082 cxadj[pcis->n] = sum; 3083 for (i=0;i<graph->ncc;i++) { 3084 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3085 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3086 PetscInt k,sizecc = 0; 3087 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3088 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3089 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3090 sizecc++; 3091 } 3092 } 3093 } 3094 } 3095 } 3096 if (sum) { 3097 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3098 } else { 3099 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3100 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3101 } 3102 graph->xadj = 0; 3103 graph->adjncy = 0; 3104 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3105 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3106 } 3107 } 3108 if (pcbddc->dbg_flag) { 3109 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3110 } 3111 3112 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3113 vertex_size = 1; 3114 if (pcbddc->user_provided_isfordofs) { 3115 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3116 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3117 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3118 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3119 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3120 } 3121 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3122 pcbddc->n_ISForDofs = 0; 3123 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3124 } 3125 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3126 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3127 } else { 3128 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3129 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3130 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3131 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3132 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3133 } 3134 } 3135 } 3136 3137 /* Setup of Graph */ 3138 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3139 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3140 } 3141 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3142 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3143 } 3144 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3145 3146 /* Graph's connected components analysis */ 3147 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3148 3149 /* print some info to stdout */ 3150 if (pcbddc->dbg_flag) { 3151 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3152 } 3153 3154 /* mark topography has done */ 3155 pcbddc->recompute_topography = PETSC_FALSE; 3156 PetscFunctionReturn(0); 3157 } 3158 3159 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3160 #undef __FUNCT__ 3161 #define __FUNCT__ "PCBDDCSubsetNumbering" 3162 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3163 { 3164 PetscSF sf; 3165 PetscLayout map; 3166 const PetscInt *idxs; 3167 PetscInt *leaf_data,*root_data,*gidxs; 3168 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3169 PetscInt n_n,nlocals,start,first_index; 3170 PetscMPIInt commsize; 3171 PetscBool first_found; 3172 PetscErrorCode ierr; 3173 3174 PetscFunctionBegin; 3175 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3176 if (subset_mult) { 3177 PetscCheckSameComm(subset,1,subset_mult,2); 3178 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3179 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3180 } 3181 /* create workspace layout for computing global indices of subset */ 3182 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3183 lbounds[0] = lbounds[1] = 0; 3184 for (i=0;i<n;i++) { 3185 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3186 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3187 } 3188 lbounds[0] = -lbounds[0]; 3189 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3190 gbounds[0] = -gbounds[0]; 3191 N = gbounds[1] - gbounds[0] + 1; 3192 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3193 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3194 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3195 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3196 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3197 3198 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3199 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3200 if (subset_mult) { 3201 const PetscInt* idxs_mult; 3202 3203 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3204 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3205 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3206 } else { 3207 for (i=0;i<n;i++) leaf_data[i] = 1; 3208 } 3209 /* local size of new subset */ 3210 n_n = 0; 3211 for (i=0;i<n;i++) n_n += leaf_data[i]; 3212 3213 /* global indexes in layout */ 3214 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3215 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3216 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3217 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3218 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3219 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3220 3221 /* reduce from leaves to roots */ 3222 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3223 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3224 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3225 3226 /* count indexes in local part of layout */ 3227 nlocals = 0; 3228 first_index = -1; 3229 first_found = PETSC_FALSE; 3230 for (i=0;i<Nl;i++) { 3231 if (!first_found && root_data[i]) { 3232 first_found = PETSC_TRUE; 3233 first_index = i; 3234 } 3235 nlocals += root_data[i]; 3236 } 3237 3238 /* cumulative of number of indexes and size of subset without holes */ 3239 #if defined(PETSC_HAVE_MPI_EXSCAN) 3240 start = 0; 3241 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3242 #else 3243 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3244 start = start-nlocals; 3245 #endif 3246 3247 if (N_n) { /* compute total size of new subset if requested */ 3248 *N_n = start + nlocals; 3249 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3250 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3251 } 3252 3253 /* adapt root data with cumulative */ 3254 if (first_found) { 3255 PetscInt old_index; 3256 3257 root_data[first_index] += start; 3258 old_index = first_index; 3259 for (i=first_index+1;i<Nl;i++) { 3260 if (root_data[i]) { 3261 root_data[i] += root_data[old_index]; 3262 old_index = i; 3263 } 3264 } 3265 } 3266 3267 /* from roots to leaves */ 3268 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3269 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3270 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3271 3272 /* create new IS with global indexes without holes */ 3273 if (subset_mult) { 3274 const PetscInt* idxs_mult; 3275 PetscInt cum; 3276 3277 cum = 0; 3278 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3279 for (i=0;i<n;i++) { 3280 PetscInt j; 3281 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3282 } 3283 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3284 } else { 3285 for (i=0;i<n;i++) { 3286 gidxs[i] = leaf_data[i]-1; 3287 } 3288 } 3289 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3290 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3291 PetscFunctionReturn(0); 3292 } 3293 3294 #undef __FUNCT__ 3295 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3296 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3297 { 3298 PetscInt i,j; 3299 PetscScalar *alphas; 3300 PetscErrorCode ierr; 3301 3302 PetscFunctionBegin; 3303 /* this implements stabilized Gram-Schmidt */ 3304 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3305 for (i=0;i<n;i++) { 3306 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3307 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3308 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3309 } 3310 ierr = PetscFree(alphas);CHKERRQ(ierr); 3311 PetscFunctionReturn(0); 3312 } 3313 3314 #undef __FUNCT__ 3315 #define __FUNCT__ "MatISGetSubassemblingPattern" 3316 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3317 { 3318 IS ranks_send_to; 3319 PetscInt n_neighs,*neighs,*n_shared,**shared; 3320 PetscMPIInt size,rank,color; 3321 PetscInt *xadj,*adjncy; 3322 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3323 PetscInt i,local_size,threshold=0; 3324 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3325 PetscSubcomm subcomm; 3326 PetscErrorCode ierr; 3327 3328 PetscFunctionBegin; 3329 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3330 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3331 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3332 3333 /* Get info on mapping */ 3334 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3335 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3336 3337 /* build local CSR graph of subdomains' connectivity */ 3338 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3339 xadj[0] = 0; 3340 xadj[1] = PetscMax(n_neighs-1,0); 3341 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3342 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3343 3344 if (threshold) { 3345 PetscInt xadj_count = 0; 3346 for (i=1;i<n_neighs;i++) { 3347 if (n_shared[i] > threshold) { 3348 adjncy[xadj_count] = neighs[i]; 3349 adjncy_wgt[xadj_count] = n_shared[i]; 3350 xadj_count++; 3351 } 3352 } 3353 xadj[1] = xadj_count; 3354 } else { 3355 if (xadj[1]) { 3356 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3357 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3358 } 3359 } 3360 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3361 if (use_square) { 3362 for (i=0;i<xadj[1];i++) { 3363 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3364 } 3365 } 3366 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3367 3368 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3369 3370 /* 3371 Restrict work on active processes only. 3372 */ 3373 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3374 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3375 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3376 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3377 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3378 if (color) { 3379 ierr = PetscFree(xadj);CHKERRQ(ierr); 3380 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3381 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3382 } else { 3383 Mat subdomain_adj; 3384 IS new_ranks,new_ranks_contig; 3385 MatPartitioning partitioner; 3386 PetscInt prank,rstart=0,rend=0; 3387 PetscInt *is_indices,*oldranks; 3388 PetscBool aggregate; 3389 3390 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3391 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3392 prank = rank; 3393 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3394 /* 3395 for (i=0;i<size;i++) { 3396 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3397 } 3398 */ 3399 for (i=0;i<xadj[1];i++) { 3400 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3401 } 3402 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3403 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3404 if (aggregate) { 3405 PetscInt lrows,row,ncols,*cols; 3406 PetscMPIInt nrank; 3407 PetscScalar *vals; 3408 3409 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3410 lrows = 0; 3411 if (nrank<redprocs) { 3412 lrows = size/redprocs; 3413 if (nrank<size%redprocs) lrows++; 3414 } 3415 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3416 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3417 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3418 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3419 row = nrank; 3420 ncols = xadj[1]-xadj[0]; 3421 cols = adjncy; 3422 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3423 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3424 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3425 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3426 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3427 ierr = PetscFree(xadj);CHKERRQ(ierr); 3428 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3429 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3430 ierr = PetscFree(vals);CHKERRQ(ierr); 3431 } else { 3432 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3433 } 3434 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3435 3436 /* Partition */ 3437 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3438 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3439 if (use_vwgt) { 3440 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3441 v_wgt[0] = local_size; 3442 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3443 } 3444 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3445 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3446 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3447 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3448 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3449 3450 /* renumber new_ranks to avoid "holes" in new set of processors */ 3451 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3452 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3453 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3454 if (!redprocs) { 3455 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3456 } else { 3457 PetscInt idxs[1]; 3458 PetscMPIInt tag; 3459 MPI_Request *reqs; 3460 3461 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3462 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3463 for (i=rstart;i<rend;i++) { 3464 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3465 } 3466 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3467 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3468 ierr = PetscFree(reqs);CHKERRQ(ierr); 3469 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3470 } 3471 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3472 /* clean up */ 3473 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3474 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3475 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3476 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3477 } 3478 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3479 3480 /* assemble parallel IS for sends */ 3481 i = 1; 3482 if (color) i=0; 3483 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3484 /* get back IS */ 3485 *is_sends = ranks_send_to; 3486 PetscFunctionReturn(0); 3487 } 3488 3489 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3490 3491 #undef __FUNCT__ 3492 #define __FUNCT__ "MatISSubassemble" 3493 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3494 { 3495 Mat local_mat; 3496 IS is_sends_internal; 3497 PetscInt rows,cols,new_local_rows; 3498 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3499 PetscBool ismatis,isdense,newisdense,destroy_mat; 3500 ISLocalToGlobalMapping l2gmap; 3501 PetscInt* l2gmap_indices; 3502 const PetscInt* is_indices; 3503 MatType new_local_type; 3504 /* buffers */ 3505 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3506 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3507 PetscInt *recv_buffer_idxs_local; 3508 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3509 /* MPI */ 3510 MPI_Comm comm,comm_n; 3511 PetscSubcomm subcomm; 3512 PetscMPIInt n_sends,n_recvs,commsize; 3513 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3514 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3515 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3516 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3517 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3518 PetscErrorCode ierr; 3519 3520 PetscFunctionBegin; 3521 /* TODO: add missing checks */ 3522 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3523 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3524 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3525 PetscValidLogicalCollectiveInt(mat,nis,7); 3526 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3527 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3528 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3529 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3530 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3531 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3532 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3533 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3534 PetscInt mrows,mcols,mnrows,mncols; 3535 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3536 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3537 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3538 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3539 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3540 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3541 } 3542 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3543 PetscValidLogicalCollectiveInt(mat,bs,0); 3544 /* prepare IS for sending if not provided */ 3545 if (!is_sends) { 3546 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3547 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3548 } else { 3549 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3550 is_sends_internal = is_sends; 3551 } 3552 3553 /* get comm */ 3554 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3555 3556 /* compute number of sends */ 3557 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3558 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3559 3560 /* compute number of receives */ 3561 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3562 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3563 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3564 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3565 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3566 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3567 ierr = PetscFree(iflags);CHKERRQ(ierr); 3568 3569 /* restrict comm if requested */ 3570 subcomm = 0; 3571 destroy_mat = PETSC_FALSE; 3572 if (restrict_comm) { 3573 PetscMPIInt color,subcommsize; 3574 3575 color = 0; 3576 if (restrict_full) { 3577 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3578 } else { 3579 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3580 } 3581 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3582 subcommsize = commsize - subcommsize; 3583 /* check if reuse has been requested */ 3584 if (reuse == MAT_REUSE_MATRIX) { 3585 if (*mat_n) { 3586 PetscMPIInt subcommsize2; 3587 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3588 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3589 comm_n = PetscObjectComm((PetscObject)*mat_n); 3590 } else { 3591 comm_n = PETSC_COMM_SELF; 3592 } 3593 } else { /* MAT_INITIAL_MATRIX */ 3594 PetscMPIInt rank; 3595 3596 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3597 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3598 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3599 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3600 comm_n = PetscSubcommChild(subcomm); 3601 } 3602 /* flag to destroy *mat_n if not significative */ 3603 if (color) destroy_mat = PETSC_TRUE; 3604 } else { 3605 comm_n = comm; 3606 } 3607 3608 /* prepare send/receive buffers */ 3609 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3610 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3611 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3612 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3613 if (nis) { 3614 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3615 } 3616 3617 /* Get data from local matrices */ 3618 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3619 /* TODO: See below some guidelines on how to prepare the local buffers */ 3620 /* 3621 send_buffer_vals should contain the raw values of the local matrix 3622 send_buffer_idxs should contain: 3623 - MatType_PRIVATE type 3624 - PetscInt size_of_l2gmap 3625 - PetscInt global_row_indices[size_of_l2gmap] 3626 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3627 */ 3628 else { 3629 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3630 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3631 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3632 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3633 send_buffer_idxs[1] = i; 3634 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3635 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3636 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3637 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3638 for (i=0;i<n_sends;i++) { 3639 ilengths_vals[is_indices[i]] = len*len; 3640 ilengths_idxs[is_indices[i]] = len+2; 3641 } 3642 } 3643 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3644 /* additional is (if any) */ 3645 if (nis) { 3646 PetscMPIInt psum; 3647 PetscInt j; 3648 for (j=0,psum=0;j<nis;j++) { 3649 PetscInt plen; 3650 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3651 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3652 psum += len+1; /* indices + lenght */ 3653 } 3654 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3655 for (j=0,psum=0;j<nis;j++) { 3656 PetscInt plen; 3657 const PetscInt *is_array_idxs; 3658 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3659 send_buffer_idxs_is[psum] = plen; 3660 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3661 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3662 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3663 psum += plen+1; /* indices + lenght */ 3664 } 3665 for (i=0;i<n_sends;i++) { 3666 ilengths_idxs_is[is_indices[i]] = psum; 3667 } 3668 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3669 } 3670 3671 buf_size_idxs = 0; 3672 buf_size_vals = 0; 3673 buf_size_idxs_is = 0; 3674 for (i=0;i<n_recvs;i++) { 3675 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3676 buf_size_vals += (PetscInt)olengths_vals[i]; 3677 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3678 } 3679 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3680 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3681 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3682 3683 /* get new tags for clean communications */ 3684 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3685 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3686 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3687 3688 /* allocate for requests */ 3689 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3690 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3691 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3692 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3693 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3694 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3695 3696 /* communications */ 3697 ptr_idxs = recv_buffer_idxs; 3698 ptr_vals = recv_buffer_vals; 3699 ptr_idxs_is = recv_buffer_idxs_is; 3700 for (i=0;i<n_recvs;i++) { 3701 source_dest = onodes[i]; 3702 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3703 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3704 ptr_idxs += olengths_idxs[i]; 3705 ptr_vals += olengths_vals[i]; 3706 if (nis) { 3707 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3708 ptr_idxs_is += olengths_idxs_is[i]; 3709 } 3710 } 3711 for (i=0;i<n_sends;i++) { 3712 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3713 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3714 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3715 if (nis) { 3716 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3717 } 3718 } 3719 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3720 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3721 3722 /* assemble new l2g map */ 3723 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3724 ptr_idxs = recv_buffer_idxs; 3725 new_local_rows = 0; 3726 for (i=0;i<n_recvs;i++) { 3727 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3728 ptr_idxs += olengths_idxs[i]; 3729 } 3730 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3731 ptr_idxs = recv_buffer_idxs; 3732 new_local_rows = 0; 3733 for (i=0;i<n_recvs;i++) { 3734 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3735 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3736 ptr_idxs += olengths_idxs[i]; 3737 } 3738 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3739 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3740 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3741 3742 /* infer new local matrix type from received local matrices type */ 3743 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3744 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3745 if (n_recvs) { 3746 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3747 ptr_idxs = recv_buffer_idxs; 3748 for (i=0;i<n_recvs;i++) { 3749 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3750 new_local_type_private = MATAIJ_PRIVATE; 3751 break; 3752 } 3753 ptr_idxs += olengths_idxs[i]; 3754 } 3755 switch (new_local_type_private) { 3756 case MATDENSE_PRIVATE: 3757 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3758 new_local_type = MATSEQAIJ; 3759 bs = 1; 3760 } else { /* if I receive only 1 dense matrix */ 3761 new_local_type = MATSEQDENSE; 3762 bs = 1; 3763 } 3764 break; 3765 case MATAIJ_PRIVATE: 3766 new_local_type = MATSEQAIJ; 3767 bs = 1; 3768 break; 3769 case MATBAIJ_PRIVATE: 3770 new_local_type = MATSEQBAIJ; 3771 break; 3772 case MATSBAIJ_PRIVATE: 3773 new_local_type = MATSEQSBAIJ; 3774 break; 3775 default: 3776 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3777 break; 3778 } 3779 } else { /* by default, new_local_type is seqdense */ 3780 new_local_type = MATSEQDENSE; 3781 bs = 1; 3782 } 3783 3784 /* create MATIS object if needed */ 3785 if (reuse == MAT_INITIAL_MATRIX) { 3786 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3787 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3788 } else { 3789 /* it also destroys the local matrices */ 3790 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3791 } 3792 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3793 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3794 3795 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3796 3797 /* Global to local map of received indices */ 3798 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3799 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3800 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3801 3802 /* restore attributes -> type of incoming data and its size */ 3803 buf_size_idxs = 0; 3804 for (i=0;i<n_recvs;i++) { 3805 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3806 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3807 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3808 } 3809 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3810 3811 /* set preallocation */ 3812 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3813 if (!newisdense) { 3814 PetscInt *new_local_nnz=0; 3815 3816 ptr_vals = recv_buffer_vals; 3817 ptr_idxs = recv_buffer_idxs_local; 3818 if (n_recvs) { 3819 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3820 } 3821 for (i=0;i<n_recvs;i++) { 3822 PetscInt j; 3823 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3824 for (j=0;j<*(ptr_idxs+1);j++) { 3825 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3826 } 3827 } else { 3828 /* TODO */ 3829 } 3830 ptr_idxs += olengths_idxs[i]; 3831 } 3832 if (new_local_nnz) { 3833 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3834 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3835 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3836 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3837 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3838 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3839 } else { 3840 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3841 } 3842 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3843 } else { 3844 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3845 } 3846 3847 /* set values */ 3848 ptr_vals = recv_buffer_vals; 3849 ptr_idxs = recv_buffer_idxs_local; 3850 for (i=0;i<n_recvs;i++) { 3851 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3852 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3853 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3854 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3855 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3856 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3857 } else { 3858 /* TODO */ 3859 } 3860 ptr_idxs += olengths_idxs[i]; 3861 ptr_vals += olengths_vals[i]; 3862 } 3863 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3864 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3867 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3868 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3869 3870 #if 0 3871 if (!restrict_comm) { /* check */ 3872 Vec lvec,rvec; 3873 PetscReal infty_error; 3874 3875 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3876 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3877 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3878 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3879 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3880 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3881 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3882 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3883 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3884 } 3885 #endif 3886 3887 /* assemble new additional is (if any) */ 3888 if (nis) { 3889 PetscInt **temp_idxs,*count_is,j,psum; 3890 3891 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3892 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3893 ptr_idxs = recv_buffer_idxs_is; 3894 psum = 0; 3895 for (i=0;i<n_recvs;i++) { 3896 for (j=0;j<nis;j++) { 3897 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3898 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3899 psum += plen; 3900 ptr_idxs += plen+1; /* shift pointer to received data */ 3901 } 3902 } 3903 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3904 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3905 for (i=1;i<nis;i++) { 3906 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3907 } 3908 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3909 ptr_idxs = recv_buffer_idxs_is; 3910 for (i=0;i<n_recvs;i++) { 3911 for (j=0;j<nis;j++) { 3912 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3913 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3914 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3915 ptr_idxs += plen+1; /* shift pointer to received data */ 3916 } 3917 } 3918 for (i=0;i<nis;i++) { 3919 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3920 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3921 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3922 } 3923 ierr = PetscFree(count_is);CHKERRQ(ierr); 3924 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3925 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3926 } 3927 /* free workspace */ 3928 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3929 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3930 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3931 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3932 if (isdense) { 3933 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3934 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3935 } else { 3936 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3937 } 3938 if (nis) { 3939 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3940 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3941 } 3942 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3943 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3944 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3945 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3946 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3947 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3948 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3949 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3950 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3951 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3952 ierr = PetscFree(onodes);CHKERRQ(ierr); 3953 if (nis) { 3954 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3955 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3956 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3957 } 3958 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3959 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3960 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3961 for (i=0;i<nis;i++) { 3962 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3963 } 3964 *mat_n = NULL; 3965 } 3966 PetscFunctionReturn(0); 3967 } 3968 3969 /* temporary hack into ksp private data structure */ 3970 #include <petsc/private/kspimpl.h> 3971 3972 #undef __FUNCT__ 3973 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3974 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3975 { 3976 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3977 PC_IS *pcis = (PC_IS*)pc->data; 3978 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3979 MatNullSpace CoarseNullSpace=NULL; 3980 ISLocalToGlobalMapping coarse_islg; 3981 IS coarse_is,*isarray; 3982 PetscInt i,im_active=-1,active_procs=-1; 3983 PetscInt nis,nisdofs,nisneu; 3984 PC pc_temp; 3985 PCType coarse_pc_type; 3986 KSPType coarse_ksp_type; 3987 PetscBool multilevel_requested,multilevel_allowed; 3988 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3989 Mat t_coarse_mat_is; 3990 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3991 PetscMPIInt all_procs; 3992 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3993 PetscBool compute_vecs = PETSC_FALSE; 3994 PetscScalar *array; 3995 PetscErrorCode ierr; 3996 3997 PetscFunctionBegin; 3998 /* Assign global numbering to coarse dofs */ 3999 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4000 PetscInt ocoarse_size; 4001 compute_vecs = PETSC_TRUE; 4002 ocoarse_size = pcbddc->coarse_size; 4003 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4004 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4005 /* see if we can avoid some work */ 4006 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4007 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4008 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4009 PC pc; 4010 PetscBool isbddc; 4011 4012 /* temporary workaround since PCBDDC does not have a reset method so far */ 4013 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4014 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4015 if (isbddc) { 4016 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4017 } 4018 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4019 coarse_reuse = PETSC_FALSE; 4020 } else { /* we can safely reuse already computed coarse matrix */ 4021 coarse_reuse = PETSC_TRUE; 4022 } 4023 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4024 coarse_reuse = PETSC_FALSE; 4025 } 4026 /* reset any subassembling information */ 4027 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4028 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4029 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4030 coarse_reuse = PETSC_TRUE; 4031 } 4032 4033 /* count "active" (i.e. with positive local size) and "void" processes */ 4034 im_active = !!(pcis->n); 4035 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4036 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4037 void_procs = all_procs-active_procs; 4038 csin_type_simple = PETSC_TRUE; 4039 redist = PETSC_FALSE; 4040 if (pcbddc->current_level && void_procs) { 4041 csin_ml = PETSC_TRUE; 4042 ncoarse_ml = void_procs; 4043 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4044 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4045 csin_ds = PETSC_TRUE; 4046 ncoarse_ds = pcbddc->redistribute_coarse; 4047 redist = PETSC_TRUE; 4048 } else { 4049 csin_ds = PETSC_TRUE; 4050 ncoarse_ds = active_procs; 4051 redist = PETSC_TRUE; 4052 } 4053 } else { 4054 csin_ml = PETSC_FALSE; 4055 ncoarse_ml = all_procs; 4056 if (void_procs) { 4057 csin_ds = PETSC_TRUE; 4058 ncoarse_ds = void_procs; 4059 csin_type_simple = PETSC_FALSE; 4060 } else { 4061 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4062 csin_ds = PETSC_TRUE; 4063 ncoarse_ds = pcbddc->redistribute_coarse; 4064 redist = PETSC_TRUE; 4065 } else { 4066 csin_ds = PETSC_FALSE; 4067 ncoarse_ds = all_procs; 4068 } 4069 } 4070 } 4071 4072 /* 4073 test if we can go multilevel: three conditions must be satisfied: 4074 - we have not exceeded the number of levels requested 4075 - we can actually subassemble the active processes 4076 - we can find a suitable number of MPI processes where we can place the subassembled problem 4077 */ 4078 multilevel_allowed = PETSC_FALSE; 4079 multilevel_requested = PETSC_FALSE; 4080 if (pcbddc->current_level < pcbddc->max_levels) { 4081 multilevel_requested = PETSC_TRUE; 4082 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4083 multilevel_allowed = PETSC_FALSE; 4084 } else { 4085 multilevel_allowed = PETSC_TRUE; 4086 } 4087 } 4088 /* determine number of process partecipating to coarse solver */ 4089 if (multilevel_allowed) { 4090 ncoarse = ncoarse_ml; 4091 csin = csin_ml; 4092 redist = PETSC_FALSE; 4093 } else { 4094 ncoarse = ncoarse_ds; 4095 csin = csin_ds; 4096 } 4097 4098 /* creates temporary l2gmap and IS for coarse indexes */ 4099 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4100 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4101 4102 /* creates temporary MATIS object for coarse matrix */ 4103 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4104 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4105 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4106 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4107 #if 0 4108 { 4109 PetscViewer viewer; 4110 char filename[256]; 4111 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4112 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4113 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4114 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4115 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 4116 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4117 } 4118 #endif 4119 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4120 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4121 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4122 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4123 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4124 4125 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4126 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4127 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4128 const PetscInt *idxs; 4129 ISLocalToGlobalMapping tmap; 4130 4131 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4132 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4133 /* allocate space for temporary storage */ 4134 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4135 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4136 /* allocate for IS array */ 4137 nisdofs = pcbddc->n_ISForDofsLocal; 4138 nisneu = !!pcbddc->NeumannBoundariesLocal; 4139 nis = nisdofs + nisneu; 4140 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4141 /* dofs splitting */ 4142 for (i=0;i<nisdofs;i++) { 4143 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4144 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4145 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4146 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4147 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4148 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4149 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4150 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4151 } 4152 /* neumann boundaries */ 4153 if (pcbddc->NeumannBoundariesLocal) { 4154 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4155 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4156 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4157 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4158 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4159 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4160 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4161 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4162 } 4163 /* free memory */ 4164 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4165 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4166 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4167 } else { 4168 nis = 0; 4169 nisdofs = 0; 4170 nisneu = 0; 4171 isarray = NULL; 4172 } 4173 /* destroy no longer needed map */ 4174 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4175 4176 /* restrict on coarse candidates (if needed) */ 4177 coarse_mat_is = NULL; 4178 if (csin) { 4179 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4180 if (redist) { 4181 PetscMPIInt rank; 4182 PetscInt spc,n_spc_p1,dest[1],destsize; 4183 4184 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4185 spc = active_procs/ncoarse; 4186 n_spc_p1 = active_procs%ncoarse; 4187 if (im_active) { 4188 destsize = 1; 4189 if (rank > n_spc_p1*(spc+1)-1) { 4190 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4191 } else { 4192 dest[0] = rank/(spc+1); 4193 } 4194 } else { 4195 destsize = 0; 4196 } 4197 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4198 } else if (csin_type_simple) { 4199 PetscMPIInt rank; 4200 PetscInt issize,isidx; 4201 4202 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4203 if (im_active) { 4204 issize = 1; 4205 isidx = (PetscInt)rank; 4206 } else { 4207 issize = 0; 4208 isidx = -1; 4209 } 4210 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4211 } else { /* get a suitable subassembling pattern from MATIS code */ 4212 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4213 } 4214 4215 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4216 if (!redist || ncoarse <= void_procs) { 4217 PetscInt ncoarse_cand,tissize,*nisindices; 4218 PetscInt *coarse_candidates; 4219 const PetscInt* tisindices; 4220 4221 /* get coarse candidates' ranks in pc communicator */ 4222 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4223 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4224 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4225 if (!coarse_candidates[i]) { 4226 coarse_candidates[ncoarse_cand++]=i; 4227 } 4228 } 4229 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4230 4231 4232 if (pcbddc->dbg_flag) { 4233 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4234 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4235 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4236 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4237 for (i=0;i<ncoarse_cand;i++) { 4238 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4239 } 4240 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4241 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4242 } 4243 /* shift the pattern on coarse candidates */ 4244 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4245 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4246 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4247 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4248 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4249 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4250 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4251 } 4252 if (pcbddc->dbg_flag) { 4253 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4254 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4255 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4256 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4257 } 4258 } 4259 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4260 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4261 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4262 } else { /* this is the last level, so use just receiving processes in subcomm */ 4263 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4264 } 4265 } else { 4266 if (pcbddc->dbg_flag) { 4267 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4268 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4269 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4270 } 4271 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4272 coarse_mat_is = t_coarse_mat_is; 4273 } 4274 4275 /* create local to global scatters for coarse problem */ 4276 if (compute_vecs) { 4277 PetscInt lrows; 4278 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4279 if (coarse_mat_is) { 4280 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4281 } else { 4282 lrows = 0; 4283 } 4284 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4285 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4286 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4287 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4288 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4289 } 4290 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4291 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4292 4293 /* set defaults for coarse KSP and PC */ 4294 if (multilevel_allowed) { 4295 coarse_ksp_type = KSPRICHARDSON; 4296 coarse_pc_type = PCBDDC; 4297 } else { 4298 coarse_ksp_type = KSPPREONLY; 4299 coarse_pc_type = PCREDUNDANT; 4300 } 4301 4302 /* print some info if requested */ 4303 if (pcbddc->dbg_flag) { 4304 if (!multilevel_allowed) { 4305 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4306 if (multilevel_requested) { 4307 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4308 } else if (pcbddc->max_levels) { 4309 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4310 } 4311 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4312 } 4313 } 4314 4315 /* create the coarse KSP object only once with defaults */ 4316 if (coarse_mat_is) { 4317 MatReuse coarse_mat_reuse; 4318 PetscViewer dbg_viewer = NULL; 4319 if (pcbddc->dbg_flag) { 4320 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4321 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4322 } 4323 if (!pcbddc->coarse_ksp) { 4324 char prefix[256],str_level[16]; 4325 size_t len; 4326 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4327 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4328 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4329 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4330 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4331 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4332 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4333 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4334 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4335 /* prefix */ 4336 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4337 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4338 if (!pcbddc->current_level) { 4339 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4340 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4341 } else { 4342 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4343 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4344 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4345 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4346 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4347 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4348 } 4349 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4350 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4351 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4352 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4353 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4354 /* allow user customization */ 4355 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4356 } 4357 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4358 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4359 if (nisdofs) { 4360 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4361 for (i=0;i<nisdofs;i++) { 4362 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4363 } 4364 } 4365 if (nisneu) { 4366 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4367 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4368 } 4369 4370 /* get some info after set from options */ 4371 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4372 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4373 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4374 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4375 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4376 isbddc = PETSC_FALSE; 4377 } 4378 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4379 if (isredundant) { 4380 KSP inner_ksp; 4381 PC inner_pc; 4382 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4383 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4384 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4385 } 4386 4387 /* assemble coarse matrix */ 4388 if (coarse_reuse) { 4389 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4390 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4391 coarse_mat_reuse = MAT_REUSE_MATRIX; 4392 } else { 4393 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4394 } 4395 if (isbddc || isnn) { 4396 if (pcbddc->coarsening_ratio > 1) { 4397 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4398 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4399 if (pcbddc->dbg_flag) { 4400 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4401 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4402 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4403 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4404 } 4405 } 4406 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4407 } else { 4408 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4409 coarse_mat = coarse_mat_is; 4410 } 4411 } else { 4412 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4413 } 4414 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4415 4416 /* propagate symmetry info of coarse matrix */ 4417 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4418 if (pc->pmat->symmetric_set) { 4419 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4420 } 4421 if (pc->pmat->hermitian_set) { 4422 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4423 } 4424 if (pc->pmat->spd_set) { 4425 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4426 } 4427 /* set operators */ 4428 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4429 if (pcbddc->dbg_flag) { 4430 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4431 } 4432 } else { /* processes non partecipating to coarse solver (if any) */ 4433 coarse_mat = 0; 4434 } 4435 ierr = PetscFree(isarray);CHKERRQ(ierr); 4436 #if 0 4437 { 4438 PetscViewer viewer; 4439 char filename[256]; 4440 sprintf(filename,"coarse_mat.m"); 4441 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4442 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4443 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4444 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 4445 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4446 } 4447 #endif 4448 4449 /* Compute coarse null space (special handling by BDDC only) */ 4450 #if 0 4451 if (pcbddc->NullSpace) { 4452 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4453 } 4454 #endif 4455 4456 if (pcbddc->coarse_ksp) { 4457 Vec crhs,csol; 4458 PetscBool ispreonly; 4459 4460 if (CoarseNullSpace) { 4461 if (isbddc) { 4462 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4463 } else { 4464 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4465 } 4466 } 4467 /* setup coarse ksp */ 4468 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4469 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4470 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4471 /* hack */ 4472 if (!csol) { 4473 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4474 } 4475 if (!crhs) { 4476 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4477 } 4478 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4479 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4480 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4481 KSP check_ksp; 4482 KSPType check_ksp_type; 4483 PC check_pc; 4484 Vec check_vec,coarse_vec; 4485 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4486 PetscInt its; 4487 PetscBool compute_eigs; 4488 PetscReal *eigs_r,*eigs_c; 4489 PetscInt neigs; 4490 const char *prefix; 4491 4492 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4493 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4494 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4495 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4496 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4497 if (ispreonly) { 4498 check_ksp_type = KSPPREONLY; 4499 compute_eigs = PETSC_FALSE; 4500 } else { 4501 check_ksp_type = KSPGMRES; 4502 compute_eigs = PETSC_TRUE; 4503 } 4504 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4505 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4506 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4507 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4508 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4509 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4510 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4511 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4512 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4513 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4514 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4515 /* create random vec */ 4516 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4517 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4518 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4519 if (CoarseNullSpace) { 4520 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4521 } 4522 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4523 /* solve coarse problem */ 4524 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4525 if (CoarseNullSpace) { 4526 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4527 } 4528 /* set eigenvalue estimation if preonly has not been requested */ 4529 if (compute_eigs) { 4530 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4531 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4532 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4533 lambda_max = eigs_r[neigs-1]; 4534 lambda_min = eigs_r[0]; 4535 if (pcbddc->use_coarse_estimates) { 4536 if (lambda_max>lambda_min) { 4537 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4538 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4539 } 4540 } 4541 } 4542 4543 /* check coarse problem residual error */ 4544 if (pcbddc->dbg_flag) { 4545 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4546 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4547 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4548 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4549 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4550 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4551 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4552 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4553 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4554 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4555 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4556 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4557 if (compute_eigs) { 4558 PetscReal lambda_max_s,lambda_min_s; 4559 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4560 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4561 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4562 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4563 for (i=0;i<neigs;i++) { 4564 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4565 } 4566 } 4567 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4568 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4569 } 4570 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4571 if (compute_eigs) { 4572 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4573 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4574 } 4575 } 4576 } 4577 /* print additional info */ 4578 if (pcbddc->dbg_flag) { 4579 /* waits until all processes reaches this point */ 4580 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4582 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4583 } 4584 4585 /* free memory */ 4586 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4587 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4588 PetscFunctionReturn(0); 4589 } 4590 4591 #undef __FUNCT__ 4592 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4593 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4594 { 4595 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4596 PC_IS* pcis = (PC_IS*)pc->data; 4597 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4598 IS subset,subset_mult,subset_n; 4599 PetscInt local_size,coarse_size=0; 4600 PetscInt *local_primal_indices=NULL; 4601 const PetscInt *t_local_primal_indices; 4602 PetscErrorCode ierr; 4603 4604 PetscFunctionBegin; 4605 /* Compute global number of coarse dofs */ 4606 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4607 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4608 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4609 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4610 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4611 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4612 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4613 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4614 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4615 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 4616 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4617 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4618 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4619 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4620 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4621 4622 /* check numbering */ 4623 if (pcbddc->dbg_flag) { 4624 PetscScalar coarsesum,*array; 4625 PetscInt i; 4626 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4627 4628 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4629 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4630 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4631 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4632 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4633 for (i=0;i<pcbddc->local_primal_size;i++) { 4634 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4635 } 4636 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4637 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4638 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4639 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4640 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4641 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4642 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4643 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4644 for (i=0;i<pcis->n;i++) { 4645 if (array[i] == 1.0) { 4646 set_error = PETSC_TRUE; 4647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4648 } 4649 } 4650 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4651 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4652 for (i=0;i<pcis->n;i++) { 4653 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4654 } 4655 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4656 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4657 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4658 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4659 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4660 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4661 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4662 PetscInt *gidxs; 4663 4664 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4665 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4666 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4667 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4668 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4669 for (i=0;i<pcbddc->local_primal_size;i++) { 4670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4671 } 4672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4673 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4674 } 4675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4676 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4677 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4678 } 4679 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4680 /* get back data */ 4681 *coarse_size_n = coarse_size; 4682 *local_primal_indices_n = local_primal_indices; 4683 PetscFunctionReturn(0); 4684 } 4685 4686 #undef __FUNCT__ 4687 #define __FUNCT__ "PCBDDCGlobalToLocal" 4688 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4689 { 4690 IS localis_t; 4691 PetscInt i,lsize,*idxs,n; 4692 PetscScalar *vals; 4693 PetscErrorCode ierr; 4694 4695 PetscFunctionBegin; 4696 /* get indices in local ordering exploiting local to global map */ 4697 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4698 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4699 for (i=0;i<lsize;i++) vals[i] = 1.0; 4700 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4701 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4702 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4703 if (idxs) { /* multilevel guard */ 4704 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4705 } 4706 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4707 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4708 ierr = PetscFree(vals);CHKERRQ(ierr); 4709 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4710 /* now compute set in local ordering */ 4711 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4712 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4713 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4714 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4715 for (i=0,lsize=0;i<n;i++) { 4716 if (PetscRealPart(vals[i]) > 0.5) { 4717 lsize++; 4718 } 4719 } 4720 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4721 for (i=0,lsize=0;i<n;i++) { 4722 if (PetscRealPart(vals[i]) > 0.5) { 4723 idxs[lsize++] = i; 4724 } 4725 } 4726 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4727 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4728 *localis = localis_t; 4729 PetscFunctionReturn(0); 4730 } 4731 4732 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4733 #undef __FUNCT__ 4734 #define __FUNCT__ "PCBDDCMatMult_Private" 4735 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4736 { 4737 PCBDDCChange_ctx change_ctx; 4738 PetscErrorCode ierr; 4739 4740 PetscFunctionBegin; 4741 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4742 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4743 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4744 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4745 PetscFunctionReturn(0); 4746 } 4747 4748 #undef __FUNCT__ 4749 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4750 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4751 { 4752 PCBDDCChange_ctx change_ctx; 4753 PetscErrorCode ierr; 4754 4755 PetscFunctionBegin; 4756 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4757 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4758 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4759 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4760 PetscFunctionReturn(0); 4761 } 4762 4763 #undef __FUNCT__ 4764 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4765 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4766 { 4767 PC_IS *pcis=(PC_IS*)pc->data; 4768 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4769 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4770 Mat S_j; 4771 PetscInt *used_xadj,*used_adjncy; 4772 PetscBool free_used_adj; 4773 PetscErrorCode ierr; 4774 4775 PetscFunctionBegin; 4776 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4777 free_used_adj = PETSC_FALSE; 4778 if (pcbddc->sub_schurs_layers == -1) { 4779 used_xadj = NULL; 4780 used_adjncy = NULL; 4781 } else { 4782 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4783 used_xadj = pcbddc->mat_graph->xadj; 4784 used_adjncy = pcbddc->mat_graph->adjncy; 4785 } else if (pcbddc->computed_rowadj) { 4786 used_xadj = pcbddc->mat_graph->xadj; 4787 used_adjncy = pcbddc->mat_graph->adjncy; 4788 } else { 4789 PetscBool flg_row=PETSC_FALSE; 4790 const PetscInt *xadj,*adjncy; 4791 PetscInt nvtxs; 4792 4793 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4794 if (flg_row) { 4795 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4796 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4797 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4798 free_used_adj = PETSC_TRUE; 4799 } else { 4800 pcbddc->sub_schurs_layers = -1; 4801 used_xadj = NULL; 4802 used_adjncy = NULL; 4803 } 4804 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4805 } 4806 } 4807 4808 /* setup sub_schurs data */ 4809 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4810 if (!sub_schurs->use_mumps) { 4811 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4812 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4813 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4814 } else { 4815 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4816 PetscBool isseqaij; 4817 if (!pcbddc->use_vertices && reuse_solvers) { 4818 PetscInt n_vertices; 4819 4820 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4821 reuse_solvers = (PetscBool)!n_vertices; 4822 } 4823 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4824 if (!isseqaij) { 4825 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4826 if (matis->A == pcbddc->local_mat) { 4827 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4828 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4829 } else { 4830 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4831 } 4832 } 4833 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4834 } 4835 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4836 4837 /* free adjacency */ 4838 if (free_used_adj) { 4839 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4840 } 4841 PetscFunctionReturn(0); 4842 } 4843 4844 #undef __FUNCT__ 4845 #define __FUNCT__ "PCBDDCInitSubSchurs" 4846 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4847 { 4848 PC_IS *pcis=(PC_IS*)pc->data; 4849 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4850 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4851 PCBDDCGraph graph; 4852 PetscErrorCode ierr; 4853 4854 PetscFunctionBegin; 4855 /* attach interface graph for determining subsets */ 4856 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4857 IS verticesIS,verticescomm; 4858 PetscInt vsize,*idxs; 4859 4860 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4861 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4862 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4863 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4864 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4865 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4866 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4867 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4868 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4869 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4870 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4871 /* 4872 if (pcbddc->dbg_flag) { 4873 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4874 } 4875 */ 4876 } else { 4877 graph = pcbddc->mat_graph; 4878 } 4879 4880 /* sub_schurs init */ 4881 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4882 4883 /* free graph struct */ 4884 if (pcbddc->sub_schurs_rebuild) { 4885 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4886 } 4887 PetscFunctionReturn(0); 4888 } 4889