1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 29 30 if (pcbddc->dbg_flag) { 31 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 32 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 33 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 35 } 36 37 if (pcbddc->dbg_flag) { 38 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 39 } 40 41 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 42 43 /* max size of subsets */ 44 mss = 0; 45 for (i=0;i<sub_schurs->n_subs;i++) { 46 PetscInt subset_size; 47 48 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 49 mss = PetscMax(mss,subset_size); 50 } 51 52 /* min/max and threshold */ 53 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 54 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 55 nmax = PetscMax(nmin,nmax); 56 allocated_S_St = PETSC_FALSE; 57 if (nmin) { 58 allocated_S_St = PETSC_TRUE; 59 } 60 61 /* allocate lapack workspace */ 62 cum = cum2 = 0; 63 maxneigs = 0; 64 for (i=0;i<sub_schurs->n_subs;i++) { 65 PetscInt n,subset_size; 66 67 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 68 n = PetscMin(subset_size,nmax); 69 cum += subset_size; 70 cum2 += subset_size*n; 71 maxneigs = PetscMax(maxneigs,n); 72 } 73 if (mss) { 74 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 75 PetscBLASInt B_itype = 1; 76 PetscBLASInt B_N = mss; 77 PetscReal zero = 0.0; 78 PetscReal eps = 0.0; /* dlamch? */ 79 80 B_lwork = -1; 81 S = NULL; 82 St = NULL; 83 eigs = NULL; 84 eigv = NULL; 85 B_iwork = NULL; 86 B_ifail = NULL; 87 #if defined(PETSC_USE_COMPLEX) 88 rwork = NULL; 89 #endif 90 thresh = 1.0; 91 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 92 #if defined(PETSC_USE_COMPLEX) 93 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 94 #else 95 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 96 #endif 97 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 98 ierr = PetscFPTrapPop();CHKERRQ(ierr); 99 } else { 100 /* TODO */ 101 } 102 } else { 103 lwork = 0; 104 } 105 106 nv = 0; 107 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 108 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 109 } 110 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 111 if (allocated_S_St) { 112 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 113 } 114 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 115 #if defined(PETSC_USE_COMPLEX) 116 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 117 #endif 118 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 119 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 121 nv+cum,&pcbddc->adaptive_constraints_idxs, 122 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 123 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 124 125 maxneigs = 0; 126 cum = cum2 = cumarray = 0; 127 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 128 pcbddc->adaptive_constraints_data_ptr[0] = 0; 129 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 130 const PetscInt *idxs; 131 132 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 133 for (cum=0;cum<nv;cum++) { 134 pcbddc->adaptive_constraints_n[cum] = 1; 135 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 136 pcbddc->adaptive_constraints_data[cum] = 1.0; 137 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 138 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 139 } 140 cum2 = cum; 141 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 142 } 143 144 if (mss) { /* multilevel */ 145 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 147 } 148 149 for (i=0;i<sub_schurs->n_subs;i++) { 150 151 const PetscInt *idxs; 152 PetscReal infty = PETSC_MAX_REAL; 153 PetscInt j,subset_size,eigs_start = 0; 154 PetscBLASInt B_N; 155 PetscBool same_data = PETSC_FALSE; 156 157 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 158 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 159 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 160 if (sub_schurs->is_hermitian) { 161 PetscInt j,k; 162 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 163 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 } 166 for (j=0;j<subset_size;j++) { 167 for (k=j;k<subset_size;k++) { 168 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 169 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 170 } 171 } 172 } else { 173 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 174 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 } 176 } else { 177 S = Sarray + cumarray; 178 St = Starray + cumarray; 179 } 180 181 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 182 /* see if we can save some work */ 183 if (sub_schurs->n_subs == 1) { 184 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 185 } 186 187 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 188 B_neigs = 0; 189 } else { 190 /* Threshold: this is an heuristic for edges */ 191 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 192 193 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 194 PetscBLASInt B_itype = 1; 195 PetscBLASInt B_IL, B_IU; 196 PetscReal eps = -1.0; /* dlamch? */ 197 PetscInt nmin_s; 198 199 /* ask for eigenvalues larger than thresh */ 200 if (pcbddc->dbg_flag) { 201 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 202 } 203 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 204 #if defined(PETSC_USE_COMPLEX) 205 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 206 #else 207 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 208 #endif 209 ierr = PetscFPTrapPop();CHKERRQ(ierr); 210 if (B_ierr) { 211 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 212 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 213 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 214 } 215 216 if (B_neigs > nmax) { 217 if (pcbddc->dbg_flag) { 218 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 219 } 220 eigs_start = B_neigs -nmax; 221 B_neigs = nmax; 222 } 223 224 nmin_s = PetscMin(nmin,B_N); 225 if (B_neigs < nmin_s) { 226 PetscBLASInt B_neigs2; 227 228 B_IU = B_N - B_neigs; 229 B_IL = B_N - nmin_s + 1; 230 if (pcbddc->dbg_flag) { 231 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 232 } 233 if (sub_schurs->is_hermitian) { 234 PetscInt j; 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 for (j=0;j<subset_size;j++) { 239 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 240 } 241 } else { 242 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 244 } 245 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 246 #if defined(PETSC_USE_COMPLEX) 247 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 248 #else 249 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 250 #endif 251 ierr = PetscFPTrapPop();CHKERRQ(ierr); 252 B_neigs += B_neigs2; 253 } 254 if (B_ierr) { 255 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 256 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 257 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 if (pcbddc->dbg_flag) { 260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 261 for (j=0;j<B_neigs;j++) { 262 if (eigs[j] == 0.0) { 263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 264 } else { 265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 266 } 267 } 268 } 269 } else { 270 /* TODO */ 271 } 272 } 273 maxneigs = PetscMax(B_neigs,maxneigs); 274 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 275 if (B_neigs) { 276 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 277 278 if (pcbddc->dbg_flag > 1) { 279 PetscInt ii; 280 for (ii=0;ii<B_neigs;ii++) { 281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 282 for (j=0;j<B_N;j++) { 283 #if defined(PETSC_USE_COMPLEX) 284 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 287 #else 288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 289 #endif 290 } 291 } 292 } 293 #if 0 294 for (j=0;j<B_neigs;j++) { 295 PetscBLASInt Blas_N,Blas_one = 1.0; 296 PetscScalar norm; 297 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 298 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 299 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 300 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 301 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 302 } else { 303 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 304 } 305 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 306 } 307 #endif 308 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 309 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 310 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 311 cum++; 312 } 313 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 314 /* shift for next computation */ 315 cumarray += subset_size*subset_size; 316 } 317 if (pcbddc->dbg_flag) { 318 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 319 } 320 321 if (mss) { 322 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 323 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 324 /* destroy matrices (junk) */ 325 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 326 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 327 } 328 if (allocated_S_St) { 329 ierr = PetscFree2(S,St);CHKERRQ(ierr); 330 } 331 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 332 #if defined(PETSC_USE_COMPLEX) 333 ierr = PetscFree(rwork);CHKERRQ(ierr); 334 #endif 335 if (pcbddc->dbg_flag) { 336 PetscInt maxneigs_r; 337 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 338 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 339 } 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "PCBDDCSetUpSolvers" 345 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 346 { 347 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 348 PetscScalar *coarse_submat_vals; 349 PetscErrorCode ierr; 350 351 PetscFunctionBegin; 352 /* Setup local scatters R_to_B and (optionally) R_to_D */ 353 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 354 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 355 356 /* Setup local neumann solver ksp_R */ 357 /* PCBDDCSetUpLocalScatters should be called first! */ 358 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 359 360 /* Change global null space passed in by the user if change of basis has been requested */ 361 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 362 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 363 } 364 365 /* 366 Setup local correction and local part of coarse basis. 367 Gives back the dense local part of the coarse matrix in column major ordering 368 */ 369 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 370 371 /* Compute total number of coarse nodes and setup coarse solver */ 372 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 373 374 /* free */ 375 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378 379 #undef __FUNCT__ 380 #define __FUNCT__ "PCBDDCResetCustomization" 381 PetscErrorCode PCBDDCResetCustomization(PC pc) 382 { 383 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 384 PetscErrorCode ierr; 385 386 PetscFunctionBegin; 387 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 390 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 392 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 393 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 394 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 395 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 396 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 397 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 #undef __FUNCT__ 402 #define __FUNCT__ "PCBDDCResetTopography" 403 PetscErrorCode PCBDDCResetTopography(PC pc) 404 { 405 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 406 PetscErrorCode ierr; 407 408 PetscFunctionBegin; 409 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 411 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 412 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 413 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "PCBDDCResetSolvers" 419 PetscErrorCode PCBDDCResetSolvers(PC pc) 420 { 421 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 422 PetscScalar *array; 423 PetscErrorCode ierr; 424 425 PetscFunctionBegin; 426 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 427 if (pcbddc->coarse_phi_B) { 428 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 429 ierr = PetscFree(array);CHKERRQ(ierr); 430 } 431 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 432 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 437 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 438 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 440 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 441 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 442 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 443 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 444 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 445 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 446 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 447 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 448 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 449 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 450 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 451 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 452 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 459 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 460 { 461 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 462 PC_IS *pcis = (PC_IS*)pc->data; 463 VecType impVecType; 464 PetscInt n_constraints,n_R,old_size; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 469 /* get sizes */ 470 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 471 n_R = pcis->n-pcbddc->n_vertices; 472 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 473 /* local work vectors (try to avoid unneeded work)*/ 474 /* R nodes */ 475 old_size = -1; 476 if (pcbddc->vec1_R) { 477 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 478 } 479 if (n_R != old_size) { 480 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 482 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 483 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 484 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 485 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 486 } 487 /* local primal dofs */ 488 old_size = -1; 489 if (pcbddc->vec1_P) { 490 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 491 } 492 if (pcbddc->local_primal_size != old_size) { 493 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 494 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 495 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 496 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 497 } 498 /* local explicit constraints */ 499 old_size = -1; 500 if (pcbddc->vec1_C) { 501 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 502 } 503 if (n_constraints && n_constraints != old_size) { 504 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 505 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 506 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 507 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 508 } 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "PCBDDCSetUpCorrection" 514 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 515 { 516 PetscErrorCode ierr; 517 /* pointers to pcis and pcbddc */ 518 PC_IS* pcis = (PC_IS*)pc->data; 519 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 520 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 521 /* submatrices of local problem */ 522 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 523 /* submatrices of local coarse problem */ 524 Mat S_VV,S_CV,S_VC,S_CC; 525 /* working matrices */ 526 Mat C_CR; 527 /* additional working stuff */ 528 PC pc_R; 529 Mat F; 530 PetscBool isLU,isCHOL,isILU; 531 532 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 533 PetscScalar *work; 534 PetscInt *idx_V_B; 535 PetscInt n,n_vertices,n_constraints; 536 PetscInt i,n_R,n_D,n_B; 537 PetscBool unsymmetric_check; 538 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 539 MatType impMatType; 540 /* some shortcuts to scalars */ 541 PetscScalar one=1.0,m_one=-1.0; 542 543 PetscFunctionBegin; 544 n_vertices = pcbddc->n_vertices; 545 n_constraints = pcbddc->local_primal_size-n_vertices; 546 /* Set Non-overlapping dimensions */ 547 n_B = pcis->n_B; 548 n_D = pcis->n - n_B; 549 n_R = pcis->n - n_vertices; 550 551 /* Set types for local objects needed by BDDC precondtioner */ 552 impMatType = MATSEQDENSE; 553 554 /* vertices in boundary numbering */ 555 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 556 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 557 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 558 559 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 560 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 568 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 569 570 unsymmetric_check = PETSC_FALSE; 571 /* allocate workspace */ 572 n = 0; 573 if (n_constraints) { 574 n += n_R*n_constraints; 575 } 576 if (n_vertices) { 577 n = PetscMax(2*n_R*n_vertices,n); 578 n = PetscMax((n_R+n_B)*n_vertices,n); 579 } 580 if (!pcbddc->symmetric_primal) { 581 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 582 unsymmetric_check = PETSC_TRUE; 583 } 584 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 585 586 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 587 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 590 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 591 if (isLU || isILU || isCHOL) { 592 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 593 } else if (sub_schurs->reuse_mumps) { 594 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 595 MatFactorType type; 596 597 F = reuse_mumps->F; 598 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 599 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 600 } else { 601 F = NULL; 602 } 603 604 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 605 if (n_constraints) { 606 Mat M1,M2,M3; 607 Mat auxmat; 608 IS is_aux; 609 PetscScalar *array,*array2; 610 611 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 614 /* Extract constraints on R nodes: C_{CR} */ 615 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 616 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 617 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 618 619 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 620 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 621 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 622 for (i=0;i<n_constraints;i++) { 623 const PetscScalar *row_cmat_values; 624 const PetscInt *row_cmat_indices; 625 PetscInt size_of_constraint,j; 626 627 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 628 for (j=0;j<size_of_constraint;j++) { 629 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 630 } 631 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 632 } 633 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 634 if (F) { 635 Mat B; 636 637 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 638 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 639 ierr = MatDestroy(&B);CHKERRQ(ierr); 640 } else { 641 PetscScalar *marr; 642 643 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 for (i=0;i<n_constraints;i++) { 645 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 646 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 650 } 651 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 652 } 653 if (!pcbddc->switch_static) { 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 655 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 656 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 660 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 661 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 662 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 663 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 664 } 665 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 666 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 667 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 668 } else { 669 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 670 pcbddc->local_auxmat2 = local_auxmat2_R; 671 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 672 } 673 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 674 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 675 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 677 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 678 if (isCHOL) { 679 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 680 } else { 681 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 682 } 683 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 684 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 685 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 686 ierr = MatDestroy(&M2);CHKERRQ(ierr); 687 ierr = MatDestroy(&M3);CHKERRQ(ierr); 688 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 689 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 690 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 691 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 692 ierr = MatDestroy(&M1);CHKERRQ(ierr); 693 } 694 /* Get submatrices from subdomain matrix */ 695 if (n_vertices) { 696 IS is_aux; 697 698 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 699 IS tis; 700 701 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 702 ierr = ISSort(tis);CHKERRQ(ierr); 703 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 704 ierr = ISDestroy(&tis);CHKERRQ(ierr); 705 } else { 706 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 707 } 708 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 709 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 710 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 711 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 712 } 713 714 /* Matrix of coarse basis functions (local) */ 715 if (pcbddc->coarse_phi_B) { 716 PetscInt on_B,on_primal,on_D=n_D; 717 if (pcbddc->coarse_phi_D) { 718 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 719 } 720 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 721 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 722 PetscScalar *marray; 723 724 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 725 ierr = PetscFree(marray);CHKERRQ(ierr); 726 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 727 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 728 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 729 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 730 } 731 } 732 733 if (!pcbddc->coarse_phi_B) { 734 PetscScalar *marray; 735 736 n = n_B*pcbddc->local_primal_size; 737 if (pcbddc->switch_static || pcbddc->dbg_flag) { 738 n += n_D*pcbddc->local_primal_size; 739 } 740 if (!pcbddc->symmetric_primal) { 741 n *= 2; 742 } 743 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 745 n = n_B*pcbddc->local_primal_size; 746 if (pcbddc->switch_static || pcbddc->dbg_flag) { 747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 748 n += n_D*pcbddc->local_primal_size; 749 } 750 if (!pcbddc->symmetric_primal) { 751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 752 if (pcbddc->switch_static || pcbddc->dbg_flag) { 753 n = n_B*pcbddc->local_primal_size; 754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 755 } 756 } else { 757 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 758 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 760 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 761 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 762 } 763 } 764 } 765 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 766 /* vertices */ 767 if (n_vertices) { 768 769 ierr = MatConvert(A_VV,impMatType,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 770 771 if (n_R) { 772 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 773 PetscBLASInt B_N,B_one = 1; 774 PetscScalar *x,*y; 775 PetscBool isseqaij; 776 777 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 778 ierr = MatConvert(A_RV,impMatType,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 780 if (F) { /* TODO could be optimized for symmetric problems */ 781 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 782 } else { 783 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 784 for (i=0;i<n_vertices;i++) { 785 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 786 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 787 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 788 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 789 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 790 } 791 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 792 } 793 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 794 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 795 if (n_constraints) { 796 Mat B; 797 798 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 799 for (i=0;i<n_vertices;i++) { 800 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 801 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 802 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 803 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 804 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 805 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 806 } 807 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 808 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 809 ierr = MatDestroy(&B);CHKERRQ(ierr); 810 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 811 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 812 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 813 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 814 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 815 ierr = MatDestroy(&B);CHKERRQ(ierr); 816 } 817 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 818 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 819 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 820 } 821 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 822 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 823 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 824 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 825 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 826 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 827 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 828 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 829 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 830 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 831 } else { 832 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 833 } 834 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 835 /* coarse basis functions */ 836 for (i=0;i<n_vertices;i++) { 837 PetscScalar *y; 838 839 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 840 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 841 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 842 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 y[n_B*i+idx_V_B[i]] = 1.0; 845 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 846 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 847 848 if (pcbddc->switch_static || pcbddc->dbg_flag) { 849 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 850 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 851 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 853 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 854 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 855 } 856 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 857 } 858 /* if n_R == 0 the object is not destroyed */ 859 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 860 } 861 862 if (n_constraints) { 863 Mat B; 864 865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 866 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 867 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 868 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 869 if (n_vertices) { 870 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 871 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 872 } else { 873 Mat S_VCt; 874 875 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 876 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 877 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 878 } 879 } 880 ierr = MatDestroy(&B);CHKERRQ(ierr); 881 /* coarse basis functions */ 882 for (i=0;i<n_constraints;i++) { 883 PetscScalar *y; 884 885 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 886 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 887 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 888 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 891 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 892 if (pcbddc->switch_static || pcbddc->dbg_flag) { 893 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 894 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 895 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 898 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 899 } 900 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 901 } 902 } 903 if (n_constraints) { 904 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 905 } 906 907 /* compute other basis functions for non-symmetric problems */ 908 if (!pcbddc->symmetric_primal) { 909 910 if (n_constraints) { 911 Mat S_CCT,B_C; 912 913 /* this is a lazy thing */ 914 ierr = MatConvert(C_CR,impMatType,MAT_INPLACE_MATRIX,&C_CR);CHKERRQ(ierr); 915 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 916 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 917 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 918 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 919 if (n_vertices) { 920 Mat B_V,S_VCT; 921 922 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 923 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 924 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 925 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 926 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 927 } 928 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 929 } else { /* if there are no constraints, reset work */ 930 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 931 } 932 if (n_vertices && n_R) { 933 Mat A_VRT; 934 PetscScalar *marray; 935 PetscBLASInt B_N,B_one = 1; 936 937 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 938 ierr = MatConvert(A_VRT,impMatType,MAT_INPLACE_MATRIX,&A_VRT);CHKERRQ(ierr); 939 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 940 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 941 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 942 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 943 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 944 } 945 946 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 947 for (i=0;i<pcbddc->local_primal_size;i++) { 948 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 949 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 950 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 951 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 952 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 953 } 954 } else { 955 for (i=0;i<pcbddc->local_primal_size;i++) { 956 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 957 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 958 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 959 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 960 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 961 } 962 } 963 /* coarse basis functions */ 964 for (i=0;i<pcbddc->local_primal_size;i++) { 965 PetscScalar *y; 966 967 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 968 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 969 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 970 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 971 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 if (i<n_vertices) { 973 y[n_B*i+idx_V_B[i]] = 1.0; 974 } 975 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 976 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 977 978 if (pcbddc->switch_static || pcbddc->dbg_flag) { 979 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 980 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 981 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 982 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 984 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 985 } 986 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 987 } 988 } 989 /* free memory */ 990 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 991 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 992 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 993 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 994 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 995 ierr = PetscFree(work);CHKERRQ(ierr); 996 if (n_vertices) { 997 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 998 } 999 if (n_constraints) { 1000 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1001 } 1002 /* Checking coarse_sub_mat and coarse basis functios */ 1003 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1004 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1005 if (pcbddc->dbg_flag) { 1006 Mat coarse_sub_mat; 1007 Mat AUXMAT,TM1,TM2,TM3,TM4; 1008 Mat coarse_phi_D,coarse_phi_B; 1009 Mat coarse_psi_D,coarse_psi_B; 1010 Mat A_II,A_BB,A_IB,A_BI; 1011 Mat C_B,CPHI; 1012 IS is_dummy; 1013 Vec mones; 1014 MatType checkmattype=MATSEQAIJ; 1015 PetscReal real_value; 1016 1017 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1018 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1019 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1020 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1021 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1022 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1023 if (unsymmetric_check) { 1024 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1025 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1026 } 1027 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1028 1029 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1030 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1032 if (unsymmetric_check) { 1033 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1034 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1035 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1036 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1037 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1038 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1039 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1040 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1041 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1042 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1043 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1044 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1045 } else { 1046 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1047 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1048 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1049 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1050 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1051 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1053 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1054 } 1055 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1056 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1057 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1058 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 1059 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1060 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1061 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1062 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1063 1064 /* check constraints */ 1065 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1066 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1067 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1068 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1069 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1070 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1071 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1073 if (unsymmetric_check) { 1074 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1075 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1076 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1077 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1079 } 1080 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1081 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1082 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1083 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1084 1085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1086 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1087 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1088 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1089 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1090 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1091 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1092 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1093 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1094 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1095 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1096 if (unsymmetric_check) { 1097 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1098 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1099 } 1100 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1101 } 1102 /* get back data */ 1103 *coarse_submat_vals_n = coarse_submat_vals; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1109 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1110 { 1111 Mat *work_mat; 1112 IS isrow_s,iscol_s; 1113 PetscBool rsorted,csorted; 1114 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1119 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1120 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1121 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1122 1123 if (!rsorted) { 1124 const PetscInt *idxs; 1125 PetscInt *idxs_sorted,i; 1126 1127 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1128 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1129 for (i=0;i<rsize;i++) { 1130 idxs_perm_r[i] = i; 1131 } 1132 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1133 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1134 for (i=0;i<rsize;i++) { 1135 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1136 } 1137 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1138 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1139 } else { 1140 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1141 isrow_s = isrow; 1142 } 1143 1144 if (!csorted) { 1145 if (isrow == iscol) { 1146 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1147 iscol_s = isrow_s; 1148 } else { 1149 const PetscInt *idxs; 1150 PetscInt *idxs_sorted,i; 1151 1152 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1153 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1154 for (i=0;i<csize;i++) { 1155 idxs_perm_c[i] = i; 1156 } 1157 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1158 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1159 for (i=0;i<csize;i++) { 1160 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1161 } 1162 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1163 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1164 } 1165 } else { 1166 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1167 iscol_s = iscol; 1168 } 1169 1170 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1171 1172 if (!rsorted || !csorted) { 1173 Mat new_mat; 1174 IS is_perm_r,is_perm_c; 1175 1176 if (!rsorted) { 1177 PetscInt *idxs_r,i; 1178 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1179 for (i=0;i<rsize;i++) { 1180 idxs_r[idxs_perm_r[i]] = i; 1181 } 1182 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1183 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1184 } else { 1185 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1186 } 1187 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1188 1189 if (!csorted) { 1190 if (isrow_s == iscol_s) { 1191 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1192 is_perm_c = is_perm_r; 1193 } else { 1194 PetscInt *idxs_c,i; 1195 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1196 for (i=0;i<csize;i++) { 1197 idxs_c[idxs_perm_c[i]] = i; 1198 } 1199 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1200 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1201 } 1202 } else { 1203 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1204 } 1205 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1206 1207 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1208 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1209 work_mat[0] = new_mat; 1210 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1211 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1212 } 1213 1214 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1215 *B = work_mat[0]; 1216 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1217 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1218 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1224 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1225 { 1226 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1227 PC_IS* pcis = (PC_IS*)pc->data; 1228 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1229 Mat new_mat; 1230 IS is_local,is_global; 1231 PetscInt local_size; 1232 PetscBool isseqaij; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1237 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1238 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1239 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1240 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1241 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1242 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1243 1244 /* check */ 1245 if (pcbddc->dbg_flag) { 1246 Vec x,x_change; 1247 PetscReal error; 1248 1249 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1250 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1251 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1252 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1255 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1257 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1258 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1259 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1260 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1261 ierr = VecDestroy(&x);CHKERRQ(ierr); 1262 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1263 } 1264 1265 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1266 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1267 if (isseqaij) { 1268 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1269 } else { 1270 Mat work_mat; 1271 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1272 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1273 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1274 } 1275 if (matis->A->symmetric_set) { 1276 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1277 #if !defined(PETSC_USE_COMPLEX) 1278 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1279 #endif 1280 } 1281 /* 1282 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1283 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1284 */ 1285 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1291 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1292 { 1293 PC_IS* pcis = (PC_IS*)(pc->data); 1294 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1295 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1296 PetscInt *idx_R_local=NULL; 1297 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1298 PetscInt vbs,bs; 1299 PetscBT bitmask=NULL; 1300 PetscErrorCode ierr; 1301 1302 PetscFunctionBegin; 1303 /* 1304 No need to setup local scatters if 1305 - primal space is unchanged 1306 AND 1307 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1308 AND 1309 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1310 */ 1311 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1312 PetscFunctionReturn(0); 1313 } 1314 /* destroy old objects */ 1315 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1316 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1317 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1318 /* Set Non-overlapping dimensions */ 1319 n_B = pcis->n_B; 1320 n_D = pcis->n - n_B; 1321 n_vertices = pcbddc->n_vertices; 1322 1323 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1324 1325 /* create auxiliary bitmask and allocate workspace */ 1326 if (!sub_schurs->reuse_mumps) { 1327 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1328 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1329 for (i=0;i<n_vertices;i++) { 1330 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1331 } 1332 1333 for (i=0, n_R=0; i<pcis->n; i++) { 1334 if (!PetscBTLookup(bitmask,i)) { 1335 idx_R_local[n_R++] = i; 1336 } 1337 } 1338 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1339 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1340 1341 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1342 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1343 } 1344 1345 /* Block code */ 1346 vbs = 1; 1347 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1348 if (bs>1 && !(n_vertices%bs)) { 1349 PetscBool is_blocked = PETSC_TRUE; 1350 PetscInt *vary; 1351 if (!sub_schurs->reuse_mumps) { 1352 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1353 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1354 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1355 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1356 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1357 for (i=0; i<pcis->n/bs; i++) { 1358 if (vary[i]!=0 && vary[i]!=bs) { 1359 is_blocked = PETSC_FALSE; 1360 break; 1361 } 1362 } 1363 ierr = PetscFree(vary);CHKERRQ(ierr); 1364 } else { 1365 /* Verify directly the R set */ 1366 for (i=0; i<n_R/bs; i++) { 1367 PetscInt j,node=idx_R_local[bs*i]; 1368 for (j=1; j<bs; j++) { 1369 if (node != idx_R_local[bs*i+j]-j) { 1370 is_blocked = PETSC_FALSE; 1371 break; 1372 } 1373 } 1374 } 1375 } 1376 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1377 vbs = bs; 1378 for (i=0;i<n_R/vbs;i++) { 1379 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1380 } 1381 } 1382 } 1383 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1384 if (sub_schurs->reuse_mumps) { 1385 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1386 1387 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1388 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1389 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1390 reuse_mumps->is_R = pcbddc->is_R_local; 1391 } else { 1392 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1393 } 1394 1395 /* print some info if requested */ 1396 if (pcbddc->dbg_flag) { 1397 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1399 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1400 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1401 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1402 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1403 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1404 } 1405 1406 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1407 if (!sub_schurs->reuse_mumps) { 1408 IS is_aux1,is_aux2; 1409 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1410 1411 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1412 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1413 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1414 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1415 for (i=0; i<n_D; i++) { 1416 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1417 } 1418 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1419 for (i=0, j=0; i<n_R; i++) { 1420 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1421 aux_array1[j++] = i; 1422 } 1423 } 1424 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1425 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1426 for (i=0, j=0; i<n_B; i++) { 1427 if (!PetscBTLookup(bitmask,is_indices[i])) { 1428 aux_array2[j++] = i; 1429 } 1430 } 1431 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1432 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1433 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1434 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1435 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1436 1437 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1438 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1439 for (i=0, j=0; i<n_R; i++) { 1440 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1441 aux_array1[j++] = i; 1442 } 1443 } 1444 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1445 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1446 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1447 } 1448 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1449 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1450 } else { 1451 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1452 IS tis; 1453 PetscInt schur_size; 1454 1455 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1456 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1457 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1458 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1459 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1460 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1461 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1462 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1463 } 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1471 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1472 { 1473 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1474 PC_IS *pcis = (PC_IS*)pc->data; 1475 PC pc_temp; 1476 Mat A_RR; 1477 MatReuse reuse; 1478 PetscScalar m_one = -1.0; 1479 PetscReal value; 1480 PetscInt n_D,n_R; 1481 PetscBool use_exact,use_exact_reduced,issbaij; 1482 PetscErrorCode ierr; 1483 /* prefixes stuff */ 1484 char dir_prefix[256],neu_prefix[256],str_level[16]; 1485 size_t len; 1486 1487 PetscFunctionBegin; 1488 1489 /* compute prefixes */ 1490 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1491 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1492 if (!pcbddc->current_level) { 1493 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1494 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1495 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1496 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1497 } else { 1498 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1499 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1500 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1501 len -= 15; /* remove "pc_bddc_coarse_" */ 1502 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1503 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1504 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1505 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1506 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1507 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1508 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1509 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1510 } 1511 1512 /* DIRICHLET PROBLEM */ 1513 if (dirichlet) { 1514 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1515 if (pcbddc->local_mat->symmetric_set) { 1516 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1517 } 1518 /* Matrix for Dirichlet problem is pcis->A_II */ 1519 n_D = pcis->n - pcis->n_B; 1520 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1521 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1522 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1523 /* default */ 1524 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1525 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1526 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1527 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1528 if (issbaij) { 1529 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1530 } else { 1531 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1532 } 1533 /* Allow user's customization */ 1534 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1535 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1536 } 1537 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1538 if (sub_schurs->reuse_mumps) { 1539 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1540 1541 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1542 } 1543 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1544 if (!n_D) { 1545 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1546 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1547 } 1548 /* Set Up KSP for Dirichlet problem of BDDC */ 1549 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1550 /* set ksp_D into pcis data */ 1551 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1552 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1553 pcis->ksp_D = pcbddc->ksp_D; 1554 } 1555 1556 /* NEUMANN PROBLEM */ 1557 A_RR = 0; 1558 if (neumann) { 1559 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1560 PetscInt ibs,mbs; 1561 PetscBool issbaij; 1562 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1563 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1564 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1565 if (pcbddc->ksp_R) { /* already created ksp */ 1566 PetscInt nn_R; 1567 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1568 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1569 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1570 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1571 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1572 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1573 reuse = MAT_INITIAL_MATRIX; 1574 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1575 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1576 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1577 reuse = MAT_INITIAL_MATRIX; 1578 } else { /* safe to reuse the matrix */ 1579 reuse = MAT_REUSE_MATRIX; 1580 } 1581 } 1582 /* last check */ 1583 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1584 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1585 reuse = MAT_INITIAL_MATRIX; 1586 } 1587 } else { /* first time, so we need to create the matrix */ 1588 reuse = MAT_INITIAL_MATRIX; 1589 } 1590 /* extract A_RR */ 1591 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1592 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1594 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1595 if (matis->A == pcbddc->local_mat) { 1596 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1597 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1598 } else { 1599 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1600 } 1601 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1602 if (matis->A == pcbddc->local_mat) { 1603 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1604 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1605 } else { 1606 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1607 } 1608 } 1609 if (!sub_schurs->reuse_mumps) { 1610 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1611 if (pcbddc->local_mat->symmetric_set) { 1612 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1613 } 1614 } else { 1615 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1616 1617 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1618 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1619 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1620 } 1621 if (!pcbddc->ksp_R) { /* create object if not present */ 1622 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1623 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1624 /* default */ 1625 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1626 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1627 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1628 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1629 if (issbaij) { 1630 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1631 } else { 1632 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1633 } 1634 /* Allow user's customization */ 1635 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1636 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1637 } 1638 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1639 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1640 if (!n_R) { 1641 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1642 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1643 } 1644 /* Reuse MUMPS solver if it is present */ 1645 if (sub_schurs->reuse_mumps) { 1646 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1647 1648 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1649 } 1650 /* Set Up KSP for Neumann problem of BDDC */ 1651 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1652 } 1653 /* free Neumann problem's matrix */ 1654 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1655 1656 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1657 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1658 if (pcbddc->dbg_flag) { 1659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1660 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1661 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1662 } 1663 if (dirichlet) { /* Dirichlet */ 1664 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1665 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1666 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1667 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1668 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1669 /* need to be adapted? */ 1670 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1671 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1672 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1673 /* print info */ 1674 if (pcbddc->dbg_flag) { 1675 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1676 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1677 } 1678 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1679 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1680 } 1681 } 1682 if (neumann) { /* Neumann */ 1683 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1684 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1685 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1686 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1687 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1688 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1689 /* need to be adapted? */ 1690 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1691 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1692 /* print info */ 1693 if (pcbddc->dbg_flag) { 1694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1695 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1696 } 1697 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1698 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1699 } 1700 } 1701 } 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1707 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1708 { 1709 PetscErrorCode ierr; 1710 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1711 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1712 1713 PetscFunctionBegin; 1714 if (!sub_schurs->reuse_mumps) { 1715 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1716 } 1717 if (!pcbddc->switch_static) { 1718 if (applytranspose && pcbddc->local_auxmat1) { 1719 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1720 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1721 } 1722 if (!sub_schurs->reuse_mumps) { 1723 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1724 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1725 } else { 1726 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1727 1728 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1729 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1730 } 1731 } else { 1732 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1733 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1734 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1735 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1736 if (applytranspose && pcbddc->local_auxmat1) { 1737 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1738 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1739 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1740 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1741 } 1742 } 1743 if (!sub_schurs->reuse_mumps) { 1744 if (applytranspose) { 1745 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1746 } else { 1747 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1748 } 1749 } else { 1750 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1751 1752 if (applytranspose) { 1753 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1754 } else { 1755 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1756 } 1757 } 1758 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1759 if (!pcbddc->switch_static) { 1760 if (!sub_schurs->reuse_mumps) { 1761 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1762 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1763 } else { 1764 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1765 1766 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1767 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1768 } 1769 if (!applytranspose && pcbddc->local_auxmat1) { 1770 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1771 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1772 } 1773 } else { 1774 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1775 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1776 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1778 if (!applytranspose && pcbddc->local_auxmat1) { 1779 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1780 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1781 } 1782 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1783 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1785 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1786 } 1787 PetscFunctionReturn(0); 1788 } 1789 1790 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1791 #undef __FUNCT__ 1792 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1793 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1794 { 1795 PetscErrorCode ierr; 1796 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1797 PC_IS* pcis = (PC_IS*) (pc->data); 1798 const PetscScalar zero = 0.0; 1799 1800 PetscFunctionBegin; 1801 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1802 if (applytranspose) { 1803 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1804 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1805 } else { 1806 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1807 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1808 } 1809 /* start communications from local primal nodes to rhs of coarse solver */ 1810 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1811 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1812 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1813 1814 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1815 /* TODO remove null space when doing multilevel */ 1816 if (pcbddc->coarse_ksp) { 1817 Vec rhs,sol; 1818 1819 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1820 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1821 if (applytranspose) { 1822 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1823 } else { 1824 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1825 } 1826 } 1827 1828 /* Local solution on R nodes */ 1829 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1830 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1831 } 1832 1833 /* communications from coarse sol to local primal nodes */ 1834 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1835 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1836 1837 /* Sum contributions from two levels */ 1838 if (applytranspose) { 1839 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1840 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1841 } else { 1842 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1843 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1850 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1851 { 1852 PetscErrorCode ierr; 1853 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1854 PetscScalar *array; 1855 Vec from,to; 1856 1857 PetscFunctionBegin; 1858 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1859 from = pcbddc->coarse_vec; 1860 to = pcbddc->vec1_P; 1861 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1862 Vec tvec; 1863 1864 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1865 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1866 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1867 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1868 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1869 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1870 } 1871 } else { /* from local to global -> put data in coarse right hand side */ 1872 from = pcbddc->vec1_P; 1873 to = pcbddc->coarse_vec; 1874 } 1875 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1881 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1882 { 1883 PetscErrorCode ierr; 1884 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1885 PetscScalar *array; 1886 Vec from,to; 1887 1888 PetscFunctionBegin; 1889 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1890 from = pcbddc->coarse_vec; 1891 to = pcbddc->vec1_P; 1892 } else { /* from local to global -> put data in coarse right hand side */ 1893 from = pcbddc->vec1_P; 1894 to = pcbddc->coarse_vec; 1895 } 1896 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1897 if (smode == SCATTER_FORWARD) { 1898 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1899 Vec tvec; 1900 1901 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1902 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1903 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1904 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1905 } 1906 } else { 1907 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1908 ierr = VecResetArray(from);CHKERRQ(ierr); 1909 } 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 1914 /* uncomment for testing purposes */ 1915 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1918 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1919 { 1920 PetscErrorCode ierr; 1921 PC_IS* pcis = (PC_IS*)(pc->data); 1922 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1923 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1924 /* one and zero */ 1925 PetscScalar one=1.0,zero=0.0; 1926 /* space to store constraints and their local indices */ 1927 PetscScalar *constraints_data; 1928 PetscInt *constraints_idxs,*constraints_idxs_B; 1929 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1930 PetscInt *constraints_n; 1931 /* iterators */ 1932 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1933 /* BLAS integers */ 1934 PetscBLASInt lwork,lierr; 1935 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1936 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1937 /* reuse */ 1938 PetscInt olocal_primal_size,olocal_primal_size_cc; 1939 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1940 /* change of basis */ 1941 PetscBool qr_needed; 1942 PetscBT change_basis,qr_needed_idx; 1943 /* auxiliary stuff */ 1944 PetscInt *nnz,*is_indices; 1945 PetscInt ncc; 1946 /* some quantities */ 1947 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1948 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1949 1950 PetscFunctionBegin; 1951 /* Destroy Mat objects computed previously */ 1952 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1953 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1954 /* save info on constraints from previous setup (if any) */ 1955 olocal_primal_size = pcbddc->local_primal_size; 1956 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1957 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1958 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1959 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1960 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1961 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1962 1963 /* print some info */ 1964 if (pcbddc->dbg_flag) { 1965 IS vertices; 1966 PetscInt nv,nedges,nfaces; 1967 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1968 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1969 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1970 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1971 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1973 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1975 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1976 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1977 } 1978 1979 if (!pcbddc->adaptive_selection) { 1980 IS ISForVertices,*ISForFaces,*ISForEdges; 1981 MatNullSpace nearnullsp; 1982 const Vec *nearnullvecs; 1983 Vec *localnearnullsp; 1984 PetscScalar *array; 1985 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1986 PetscBool nnsp_has_cnst; 1987 /* LAPACK working arrays for SVD or POD */ 1988 PetscBool skip_lapack,boolforchange; 1989 PetscScalar *work; 1990 PetscReal *singular_vals; 1991 #if defined(PETSC_USE_COMPLEX) 1992 PetscReal *rwork; 1993 #endif 1994 #if defined(PETSC_MISSING_LAPACK_GESVD) 1995 PetscScalar *temp_basis,*correlation_mat; 1996 #else 1997 PetscBLASInt dummy_int=1; 1998 PetscScalar dummy_scalar=1.; 1999 #endif 2000 2001 /* Get index sets for faces, edges and vertices from graph */ 2002 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2003 /* free unneeded index sets */ 2004 if (!pcbddc->use_vertices) { 2005 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2006 } 2007 if (!pcbddc->use_edges) { 2008 for (i=0;i<n_ISForEdges;i++) { 2009 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2010 } 2011 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2012 n_ISForEdges = 0; 2013 } 2014 if (!pcbddc->use_faces) { 2015 for (i=0;i<n_ISForFaces;i++) { 2016 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2017 } 2018 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2019 n_ISForFaces = 0; 2020 } 2021 2022 #if defined(PETSC_USE_DEBUG) 2023 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2024 Also use_change_of_basis should be consistent among processors */ 2025 if (pcbddc->NullSpace) { 2026 PetscBool tbool[2],gbool[2]; 2027 2028 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2029 pcbddc->use_change_of_basis = PETSC_TRUE; 2030 if (!ISForEdges) { 2031 pcbddc->use_change_on_faces = PETSC_TRUE; 2032 } 2033 } 2034 tbool[0] = pcbddc->use_change_of_basis; 2035 tbool[1] = pcbddc->use_change_on_faces; 2036 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2037 pcbddc->use_change_of_basis = gbool[0]; 2038 pcbddc->use_change_on_faces = gbool[1]; 2039 } 2040 #endif 2041 2042 /* check if near null space is attached to global mat */ 2043 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2044 if (nearnullsp) { 2045 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2046 /* remove any stored info */ 2047 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2048 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2049 /* store information for BDDC solver reuse */ 2050 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2051 pcbddc->onearnullspace = nearnullsp; 2052 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2053 for (i=0;i<nnsp_size;i++) { 2054 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2055 } 2056 } else { /* if near null space is not provided BDDC uses constants by default */ 2057 nnsp_size = 0; 2058 nnsp_has_cnst = PETSC_TRUE; 2059 } 2060 /* get max number of constraints on a single cc */ 2061 max_constraints = nnsp_size; 2062 if (nnsp_has_cnst) max_constraints++; 2063 2064 /* 2065 Evaluate maximum storage size needed by the procedure 2066 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2067 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2068 There can be multiple constraints per connected component 2069 */ 2070 n_vertices = 0; 2071 if (ISForVertices) { 2072 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2073 } 2074 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2075 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2076 2077 total_counts = n_ISForFaces+n_ISForEdges; 2078 total_counts *= max_constraints; 2079 total_counts += n_vertices; 2080 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2081 2082 total_counts = 0; 2083 max_size_of_constraint = 0; 2084 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2085 IS used_is; 2086 if (i<n_ISForEdges) { 2087 used_is = ISForEdges[i]; 2088 } else { 2089 used_is = ISForFaces[i-n_ISForEdges]; 2090 } 2091 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2092 total_counts += j; 2093 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2094 } 2095 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2096 2097 /* get local part of global near null space vectors */ 2098 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2099 for (k=0;k<nnsp_size;k++) { 2100 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2101 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2102 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2103 } 2104 2105 /* whether or not to skip lapack calls */ 2106 skip_lapack = PETSC_TRUE; 2107 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2108 2109 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2110 if (!skip_lapack) { 2111 PetscScalar temp_work; 2112 2113 #if defined(PETSC_MISSING_LAPACK_GESVD) 2114 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2115 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2116 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2117 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2118 #if defined(PETSC_USE_COMPLEX) 2119 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2120 #endif 2121 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2122 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2123 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2124 lwork = -1; 2125 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2126 #if !defined(PETSC_USE_COMPLEX) 2127 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2128 #else 2129 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2130 #endif 2131 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2132 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2133 #else /* on missing GESVD */ 2134 /* SVD */ 2135 PetscInt max_n,min_n; 2136 max_n = max_size_of_constraint; 2137 min_n = max_constraints; 2138 if (max_size_of_constraint < max_constraints) { 2139 min_n = max_size_of_constraint; 2140 max_n = max_constraints; 2141 } 2142 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2143 #if defined(PETSC_USE_COMPLEX) 2144 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2145 #endif 2146 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2147 lwork = -1; 2148 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2149 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2150 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2151 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2152 #if !defined(PETSC_USE_COMPLEX) 2153 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2154 #else 2155 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2156 #endif 2157 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2158 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2159 #endif /* on missing GESVD */ 2160 /* Allocate optimal workspace */ 2161 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2162 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2163 } 2164 /* Now we can loop on constraining sets */ 2165 total_counts = 0; 2166 constraints_idxs_ptr[0] = 0; 2167 constraints_data_ptr[0] = 0; 2168 /* vertices */ 2169 if (n_vertices) { 2170 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2171 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2172 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2173 for (i=0;i<n_vertices;i++) { 2174 constraints_n[total_counts] = 1; 2175 constraints_data[total_counts] = 1.0; 2176 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2177 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2178 total_counts++; 2179 } 2180 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2181 PetscBool used_vertex; 2182 for (i=0;i<n_vertices;i++) { 2183 used_vertex = PETSC_FALSE; 2184 k = 0; 2185 while (!used_vertex && k<nnsp_size) { 2186 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2187 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2188 constraints_n[total_counts] = 1; 2189 constraints_idxs[total_counts] = is_indices[i]; 2190 constraints_data[total_counts] = 1.0; 2191 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2192 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2193 total_counts++; 2194 used_vertex = PETSC_TRUE; 2195 } 2196 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2197 k++; 2198 } 2199 } 2200 } 2201 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2202 n_vertices = total_counts; 2203 } 2204 2205 /* edges and faces */ 2206 total_counts_cc = total_counts; 2207 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2208 IS used_is; 2209 PetscBool idxs_copied = PETSC_FALSE; 2210 2211 if (ncc<n_ISForEdges) { 2212 used_is = ISForEdges[ncc]; 2213 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2214 } else { 2215 used_is = ISForFaces[ncc-n_ISForEdges]; 2216 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2217 } 2218 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2219 2220 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2221 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2222 /* change of basis should not be performed on local periodic nodes */ 2223 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2224 if (nnsp_has_cnst) { 2225 PetscScalar quad_value; 2226 2227 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2228 idxs_copied = PETSC_TRUE; 2229 2230 if (!pcbddc->use_nnsp_true) { 2231 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2232 } else { 2233 quad_value = 1.0; 2234 } 2235 for (j=0;j<size_of_constraint;j++) { 2236 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2237 } 2238 temp_constraints++; 2239 total_counts++; 2240 } 2241 for (k=0;k<nnsp_size;k++) { 2242 PetscReal real_value; 2243 PetscScalar *ptr_to_data; 2244 2245 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2246 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2247 for (j=0;j<size_of_constraint;j++) { 2248 ptr_to_data[j] = array[is_indices[j]]; 2249 } 2250 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2251 /* check if array is null on the connected component */ 2252 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2253 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2254 if (real_value > 0.0) { /* keep indices and values */ 2255 temp_constraints++; 2256 total_counts++; 2257 if (!idxs_copied) { 2258 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2259 idxs_copied = PETSC_TRUE; 2260 } 2261 } 2262 } 2263 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2264 valid_constraints = temp_constraints; 2265 if (!pcbddc->use_nnsp_true && temp_constraints) { 2266 if (temp_constraints == 1) { /* just normalize the constraint */ 2267 PetscScalar norm,*ptr_to_data; 2268 2269 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2270 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2271 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2272 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2273 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2274 } else { /* perform SVD */ 2275 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2276 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2277 2278 #if defined(PETSC_MISSING_LAPACK_GESVD) 2279 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2280 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2281 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2282 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2283 from that computed using LAPACKgesvd 2284 -> This is due to a different computation of eigenvectors in LAPACKheev 2285 -> The quality of the POD-computed basis will be the same */ 2286 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2287 /* Store upper triangular part of correlation matrix */ 2288 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2289 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2290 for (j=0;j<temp_constraints;j++) { 2291 for (k=0;k<j+1;k++) { 2292 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2293 } 2294 } 2295 /* compute eigenvalues and eigenvectors of correlation matrix */ 2296 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2297 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2298 #if !defined(PETSC_USE_COMPLEX) 2299 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2300 #else 2301 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2302 #endif 2303 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2304 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2305 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2306 j = 0; 2307 while (j < temp_constraints && singular_vals[j] < tol) j++; 2308 total_counts = total_counts-j; 2309 valid_constraints = temp_constraints-j; 2310 /* scale and copy POD basis into used quadrature memory */ 2311 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2312 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2313 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2314 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2315 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2316 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2317 if (j<temp_constraints) { 2318 PetscInt ii; 2319 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2320 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2321 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2322 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2323 for (k=0;k<temp_constraints-j;k++) { 2324 for (ii=0;ii<size_of_constraint;ii++) { 2325 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2326 } 2327 } 2328 } 2329 #else /* on missing GESVD */ 2330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2331 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2332 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2333 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2334 #if !defined(PETSC_USE_COMPLEX) 2335 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2336 #else 2337 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2338 #endif 2339 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2340 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2341 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2342 k = temp_constraints; 2343 if (k > size_of_constraint) k = size_of_constraint; 2344 j = 0; 2345 while (j < k && singular_vals[k-j-1] < tol) j++; 2346 valid_constraints = k-j; 2347 total_counts = total_counts-temp_constraints+valid_constraints; 2348 #endif /* on missing GESVD */ 2349 } 2350 } 2351 /* update pointers information */ 2352 if (valid_constraints) { 2353 constraints_n[total_counts_cc] = valid_constraints; 2354 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2355 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2356 /* set change_of_basis flag */ 2357 if (boolforchange) { 2358 PetscBTSet(change_basis,total_counts_cc); 2359 } 2360 total_counts_cc++; 2361 } 2362 } 2363 /* free workspace */ 2364 if (!skip_lapack) { 2365 ierr = PetscFree(work);CHKERRQ(ierr); 2366 #if defined(PETSC_USE_COMPLEX) 2367 ierr = PetscFree(rwork);CHKERRQ(ierr); 2368 #endif 2369 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2370 #if defined(PETSC_MISSING_LAPACK_GESVD) 2371 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2372 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2373 #endif 2374 } 2375 for (k=0;k<nnsp_size;k++) { 2376 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2377 } 2378 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2379 /* free index sets of faces, edges and vertices */ 2380 for (i=0;i<n_ISForFaces;i++) { 2381 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2382 } 2383 if (n_ISForFaces) { 2384 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2385 } 2386 for (i=0;i<n_ISForEdges;i++) { 2387 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2388 } 2389 if (n_ISForEdges) { 2390 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2391 } 2392 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2393 } else { 2394 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2395 2396 total_counts = 0; 2397 n_vertices = 0; 2398 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2399 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2400 } 2401 max_constraints = 0; 2402 total_counts_cc = 0; 2403 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2404 total_counts += pcbddc->adaptive_constraints_n[i]; 2405 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2406 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2407 } 2408 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2409 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2410 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2411 constraints_data = pcbddc->adaptive_constraints_data; 2412 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2413 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2414 total_counts_cc = 0; 2415 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2416 if (pcbddc->adaptive_constraints_n[i]) { 2417 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2418 } 2419 } 2420 #if 0 2421 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2422 for (i=0;i<total_counts_cc;i++) { 2423 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2424 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2425 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2426 printf(" %d",constraints_idxs[j]); 2427 } 2428 printf("\n"); 2429 printf("number of cc: %d\n",constraints_n[i]); 2430 } 2431 for (i=0;i<n_vertices;i++) { 2432 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2433 } 2434 for (i=0;i<sub_schurs->n_subs;i++) { 2435 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2436 } 2437 #endif 2438 2439 max_size_of_constraint = 0; 2440 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2441 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2442 /* Change of basis */ 2443 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2444 if (pcbddc->use_change_of_basis) { 2445 for (i=0;i<sub_schurs->n_subs;i++) { 2446 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2447 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2448 } 2449 } 2450 } 2451 } 2452 pcbddc->local_primal_size = total_counts; 2453 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2454 2455 /* map constraints_idxs in boundary numbering */ 2456 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2457 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 2458 2459 /* Create constraint matrix */ 2460 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2461 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2462 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2463 2464 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2465 /* determine if a QR strategy is needed for change of basis */ 2466 qr_needed = PETSC_FALSE; 2467 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2468 total_primal_vertices=0; 2469 pcbddc->local_primal_size_cc = 0; 2470 for (i=0;i<total_counts_cc;i++) { 2471 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2472 if (size_of_constraint == 1) { 2473 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2474 pcbddc->local_primal_size_cc += 1; 2475 } else if (PetscBTLookup(change_basis,i)) { 2476 for (k=0;k<constraints_n[i];k++) { 2477 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2478 } 2479 pcbddc->local_primal_size_cc += constraints_n[i]; 2480 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2481 PetscBTSet(qr_needed_idx,i); 2482 qr_needed = PETSC_TRUE; 2483 } 2484 } else { 2485 pcbddc->local_primal_size_cc += 1; 2486 } 2487 } 2488 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2489 pcbddc->n_vertices = total_primal_vertices; 2490 /* permute indices in order to have a sorted set of vertices */ 2491 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2492 2493 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2494 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2495 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2496 2497 /* nonzero structure of constraint matrix */ 2498 /* and get reference dof for local constraints */ 2499 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2500 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2501 2502 j = total_primal_vertices; 2503 total_counts = total_primal_vertices; 2504 cum = total_primal_vertices; 2505 for (i=n_vertices;i<total_counts_cc;i++) { 2506 if (!PetscBTLookup(change_basis,i)) { 2507 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2508 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2509 cum++; 2510 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2511 for (k=0;k<constraints_n[i];k++) { 2512 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2513 nnz[j+k] = size_of_constraint; 2514 } 2515 j += constraints_n[i]; 2516 } 2517 } 2518 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2519 ierr = PetscFree(nnz);CHKERRQ(ierr); 2520 2521 /* set values in constraint matrix */ 2522 for (i=0;i<total_primal_vertices;i++) { 2523 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2524 } 2525 total_counts = total_primal_vertices; 2526 for (i=n_vertices;i<total_counts_cc;i++) { 2527 if (!PetscBTLookup(change_basis,i)) { 2528 PetscInt *cols; 2529 2530 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2531 cols = constraints_idxs+constraints_idxs_ptr[i]; 2532 for (k=0;k<constraints_n[i];k++) { 2533 PetscInt row = total_counts+k; 2534 PetscScalar *vals; 2535 2536 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2537 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2538 } 2539 total_counts += constraints_n[i]; 2540 } 2541 } 2542 /* assembling */ 2543 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2544 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2545 2546 /* 2547 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2548 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2549 */ 2550 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2551 if (pcbddc->use_change_of_basis) { 2552 /* dual and primal dofs on a single cc */ 2553 PetscInt dual_dofs,primal_dofs; 2554 /* working stuff for GEQRF */ 2555 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2556 PetscBLASInt lqr_work; 2557 /* working stuff for UNGQR */ 2558 PetscScalar *gqr_work,lgqr_work_t; 2559 PetscBLASInt lgqr_work; 2560 /* working stuff for TRTRS */ 2561 PetscScalar *trs_rhs; 2562 PetscBLASInt Blas_NRHS; 2563 /* pointers for values insertion into change of basis matrix */ 2564 PetscInt *start_rows,*start_cols; 2565 PetscScalar *start_vals; 2566 /* working stuff for values insertion */ 2567 PetscBT is_primal; 2568 PetscInt *aux_primal_numbering_B; 2569 /* matrix sizes */ 2570 PetscInt global_size,local_size; 2571 /* temporary change of basis */ 2572 Mat localChangeOfBasisMatrix; 2573 /* extra space for debugging */ 2574 PetscScalar *dbg_work; 2575 2576 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2577 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2578 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2579 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2580 /* nonzeros for local mat */ 2581 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2582 for (i=0;i<pcis->n;i++) nnz[i]=1; 2583 for (i=n_vertices;i<total_counts_cc;i++) { 2584 if (PetscBTLookup(change_basis,i)) { 2585 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2586 if (PetscBTLookup(qr_needed_idx,i)) { 2587 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2588 } else { 2589 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2590 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2591 } 2592 } 2593 } 2594 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2595 ierr = PetscFree(nnz);CHKERRQ(ierr); 2596 /* Set initial identity in the matrix */ 2597 for (i=0;i<pcis->n;i++) { 2598 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2599 } 2600 2601 if (pcbddc->dbg_flag) { 2602 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2604 } 2605 2606 2607 /* Now we loop on the constraints which need a change of basis */ 2608 /* 2609 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2610 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2611 2612 Basic blocks of change of basis matrix T computed by 2613 2614 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2615 2616 | 1 0 ... 0 s_1/S | 2617 | 0 1 ... 0 s_2/S | 2618 | ... | 2619 | 0 ... 1 s_{n-1}/S | 2620 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2621 2622 with S = \sum_{i=1}^n s_i^2 2623 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2624 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2625 2626 - QR decomposition of constraints otherwise 2627 */ 2628 if (qr_needed) { 2629 /* space to store Q */ 2630 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2631 /* first we issue queries for optimal work */ 2632 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2633 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2634 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2635 lqr_work = -1; 2636 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2637 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2638 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2639 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2640 lgqr_work = -1; 2641 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2642 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2643 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2644 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2645 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2646 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2647 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2648 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2649 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2650 /* array to store scaling factors for reflectors */ 2651 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2652 /* array to store rhs and solution of triangular solver */ 2653 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2654 /* allocating workspace for check */ 2655 if (pcbddc->dbg_flag) { 2656 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2657 } 2658 } 2659 /* array to store whether a node is primal or not */ 2660 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2661 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2662 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2663 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 2664 for (i=0;i<total_primal_vertices;i++) { 2665 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2666 } 2667 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2668 2669 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2670 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2671 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2672 if (PetscBTLookup(change_basis,total_counts)) { 2673 /* get constraint info */ 2674 primal_dofs = constraints_n[total_counts]; 2675 dual_dofs = size_of_constraint-primal_dofs; 2676 2677 if (pcbddc->dbg_flag) { 2678 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2679 } 2680 2681 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2682 2683 /* copy quadrature constraints for change of basis check */ 2684 if (pcbddc->dbg_flag) { 2685 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2686 } 2687 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2688 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2689 2690 /* compute QR decomposition of constraints */ 2691 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2692 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2693 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2694 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2695 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2696 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2697 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2698 2699 /* explictly compute R^-T */ 2700 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2701 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2702 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2703 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2704 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2705 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2706 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2707 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2708 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2709 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2710 2711 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2712 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2713 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2714 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2715 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2716 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2717 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2718 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2719 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2720 2721 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2722 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2723 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2726 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2727 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2728 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2729 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2730 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2731 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2732 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2733 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2734 2735 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2736 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2737 /* insert cols for primal dofs */ 2738 for (j=0;j<primal_dofs;j++) { 2739 start_vals = &qr_basis[j*size_of_constraint]; 2740 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2741 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2742 } 2743 /* insert cols for dual dofs */ 2744 for (j=0,k=0;j<dual_dofs;k++) { 2745 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2746 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2747 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2748 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2749 j++; 2750 } 2751 } 2752 2753 /* check change of basis */ 2754 if (pcbddc->dbg_flag) { 2755 PetscInt ii,jj; 2756 PetscBool valid_qr=PETSC_TRUE; 2757 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2758 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2759 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2760 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2761 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2762 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2763 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2764 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2765 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2766 for (jj=0;jj<size_of_constraint;jj++) { 2767 for (ii=0;ii<primal_dofs;ii++) { 2768 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2769 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2770 } 2771 } 2772 if (!valid_qr) { 2773 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2774 for (jj=0;jj<size_of_constraint;jj++) { 2775 for (ii=0;ii<primal_dofs;ii++) { 2776 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2777 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2778 } 2779 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2780 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2781 } 2782 } 2783 } 2784 } else { 2785 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2786 } 2787 } 2788 } else { /* simple transformation block */ 2789 PetscInt row,col; 2790 PetscScalar val,norm; 2791 2792 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2793 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2794 for (j=0;j<size_of_constraint;j++) { 2795 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2796 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2797 if (!PetscBTLookup(is_primal,row_B)) { 2798 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2799 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2800 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2801 } else { 2802 for (k=0;k<size_of_constraint;k++) { 2803 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2804 if (row != col) { 2805 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2806 } else { 2807 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2808 } 2809 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2810 } 2811 } 2812 } 2813 if (pcbddc->dbg_flag) { 2814 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2815 } 2816 } 2817 } else { 2818 if (pcbddc->dbg_flag) { 2819 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2820 } 2821 } 2822 } 2823 2824 /* free workspace */ 2825 if (qr_needed) { 2826 if (pcbddc->dbg_flag) { 2827 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2828 } 2829 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2830 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2831 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2832 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2833 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2834 } 2835 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2836 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2837 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2838 2839 /* assembling of global change of variable */ 2840 { 2841 Mat tmat; 2842 PetscInt bs; 2843 2844 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2845 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2846 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2847 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2848 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2849 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2850 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2851 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2852 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2853 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2854 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2855 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2856 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2857 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2858 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2859 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2860 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2861 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2862 } 2863 /* check */ 2864 if (pcbddc->dbg_flag) { 2865 PetscReal error; 2866 Vec x,x_change; 2867 2868 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2869 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2870 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2871 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2872 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2873 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2874 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2875 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2876 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2877 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2878 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2879 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2880 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2881 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2882 ierr = VecDestroy(&x);CHKERRQ(ierr); 2883 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2884 } 2885 2886 /* adapt sub_schurs computed (if any) */ 2887 if (pcbddc->use_deluxe_scaling) { 2888 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2889 if (sub_schurs->S_Ej_all) { 2890 Mat S_new,tmat; 2891 IS is_all_N; 2892 2893 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2894 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2895 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2896 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2897 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2898 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2899 sub_schurs->S_Ej_all = S_new; 2900 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2901 if (sub_schurs->sum_S_Ej_all) { 2902 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2903 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2904 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2905 sub_schurs->sum_S_Ej_all = S_new; 2906 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2907 } 2908 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2909 } 2910 } 2911 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2912 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2913 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2914 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2915 } 2916 2917 /* set up change of basis context */ 2918 if (pcbddc->ChangeOfBasisMatrix) { 2919 PCBDDCChange_ctx change_ctx; 2920 2921 if (!pcbddc->new_global_mat) { 2922 PetscInt global_size,local_size; 2923 2924 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2925 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2926 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2927 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2928 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2929 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2930 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2931 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2932 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2933 } else { 2934 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2935 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2936 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2937 } 2938 if (!pcbddc->user_ChangeOfBasisMatrix) { 2939 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2940 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2941 } else { 2942 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2943 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2944 } 2945 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2946 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2947 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2948 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2949 } 2950 2951 /* check if a new primal space has been introduced */ 2952 pcbddc->new_primal_space_local = PETSC_TRUE; 2953 if (olocal_primal_size == pcbddc->local_primal_size) { 2954 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2955 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2956 if (!pcbddc->new_primal_space_local) { 2957 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2958 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2959 } 2960 } 2961 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2962 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2963 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2964 2965 /* flush dbg viewer */ 2966 if (pcbddc->dbg_flag) { 2967 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2968 } 2969 2970 /* free workspace */ 2971 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2972 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2973 if (!pcbddc->adaptive_selection) { 2974 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2975 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2976 } else { 2977 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2978 pcbddc->adaptive_constraints_idxs_ptr, 2979 pcbddc->adaptive_constraints_data_ptr, 2980 pcbddc->adaptive_constraints_idxs, 2981 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2982 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2983 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2984 } 2985 PetscFunctionReturn(0); 2986 } 2987 2988 #undef __FUNCT__ 2989 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2990 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2991 { 2992 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2993 PC_IS *pcis = (PC_IS*)pc->data; 2994 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2995 PetscInt ierr,i,vertex_size,N; 2996 PetscViewer viewer=pcbddc->dbg_viewer; 2997 2998 PetscFunctionBegin; 2999 /* Reset previously computed graph */ 3000 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3001 /* Init local Graph struct */ 3002 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3003 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3004 3005 /* Check validity of the csr graph passed in by the user */ 3006 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3007 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3008 } 3009 3010 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3011 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3012 PetscInt *xadj,*adjncy; 3013 PetscInt nvtxs; 3014 PetscBool flg_row=PETSC_FALSE; 3015 3016 if (pcbddc->use_local_adj) { 3017 3018 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3019 if (flg_row) { 3020 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3021 pcbddc->computed_rowadj = PETSC_TRUE; 3022 } 3023 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3024 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3025 IS is_dummy; 3026 ISLocalToGlobalMapping l2gmap_dummy; 3027 PetscInt j,sum; 3028 PetscInt *cxadj,*cadjncy; 3029 const PetscInt *idxs; 3030 PCBDDCGraph graph; 3031 PetscBT is_on_boundary; 3032 3033 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3034 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3035 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3036 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3037 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3038 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3039 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3040 if (flg_row) { 3041 graph->xadj = xadj; 3042 graph->adjncy = adjncy; 3043 } 3044 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3045 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3046 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3047 3048 if (pcbddc->dbg_flag) { 3049 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3050 for (i=0;i<graph->ncc;i++) { 3051 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3052 } 3053 } 3054 3055 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3056 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3057 for (i=0;i<pcis->n_B;i++) { 3058 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3059 } 3060 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3061 3062 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3063 sum = 0; 3064 for (i=0;i<graph->ncc;i++) { 3065 PetscInt sizecc = 0; 3066 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3067 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3068 sizecc++; 3069 } 3070 } 3071 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3072 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3073 cxadj[graph->queue[j]] = sizecc; 3074 } 3075 } 3076 sum += sizecc*sizecc; 3077 } 3078 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3079 sum = 0; 3080 for (i=0;i<pcis->n;i++) { 3081 PetscInt temp = cxadj[i]; 3082 cxadj[i] = sum; 3083 sum += temp; 3084 } 3085 cxadj[pcis->n] = sum; 3086 for (i=0;i<graph->ncc;i++) { 3087 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3088 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3089 PetscInt k,sizecc = 0; 3090 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3091 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3092 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3093 sizecc++; 3094 } 3095 } 3096 } 3097 } 3098 } 3099 if (sum) { 3100 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3101 } else { 3102 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3103 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3104 } 3105 graph->xadj = 0; 3106 graph->adjncy = 0; 3107 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3108 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3109 } 3110 } 3111 if (pcbddc->dbg_flag) { 3112 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3113 } 3114 3115 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3116 vertex_size = 1; 3117 if (pcbddc->user_provided_isfordofs) { 3118 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3119 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3120 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3121 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3122 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3123 } 3124 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3125 pcbddc->n_ISForDofs = 0; 3126 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3127 } 3128 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3129 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3130 } else { 3131 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3132 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3133 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3134 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3135 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3136 } 3137 } 3138 } 3139 3140 /* Setup of Graph */ 3141 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3142 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3143 } 3144 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3145 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3146 } 3147 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3148 3149 /* Graph's connected components analysis */ 3150 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3151 3152 /* print some info to stdout */ 3153 if (pcbddc->dbg_flag) { 3154 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3155 } 3156 3157 /* mark topography has done */ 3158 pcbddc->recompute_topography = PETSC_FALSE; 3159 PetscFunctionReturn(0); 3160 } 3161 3162 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3163 #undef __FUNCT__ 3164 #define __FUNCT__ "PCBDDCSubsetNumbering" 3165 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3166 { 3167 PetscSF sf; 3168 PetscLayout map; 3169 const PetscInt *idxs; 3170 PetscInt *leaf_data,*root_data,*gidxs; 3171 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3172 PetscInt n_n,nlocals,start,first_index; 3173 PetscMPIInt commsize; 3174 PetscBool first_found; 3175 PetscErrorCode ierr; 3176 3177 PetscFunctionBegin; 3178 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3179 if (subset_mult) { 3180 PetscCheckSameComm(subset,1,subset_mult,2); 3181 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3182 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3183 } 3184 /* create workspace layout for computing global indices of subset */ 3185 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3186 lbounds[0] = lbounds[1] = 0; 3187 for (i=0;i<n;i++) { 3188 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3189 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3190 } 3191 lbounds[0] = -lbounds[0]; 3192 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3193 gbounds[0] = -gbounds[0]; 3194 N = gbounds[1] - gbounds[0] + 1; 3195 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3196 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3197 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3198 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3199 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3200 3201 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3202 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3203 if (subset_mult) { 3204 const PetscInt* idxs_mult; 3205 3206 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3207 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3208 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3209 } else { 3210 for (i=0;i<n;i++) leaf_data[i] = 1; 3211 } 3212 /* local size of new subset */ 3213 n_n = 0; 3214 for (i=0;i<n;i++) n_n += leaf_data[i]; 3215 3216 /* global indexes in layout */ 3217 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3218 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3219 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3220 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3221 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3222 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3223 3224 /* reduce from leaves to roots */ 3225 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3226 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3227 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3228 3229 /* count indexes in local part of layout */ 3230 nlocals = 0; 3231 first_index = -1; 3232 first_found = PETSC_FALSE; 3233 for (i=0;i<Nl;i++) { 3234 if (!first_found && root_data[i]) { 3235 first_found = PETSC_TRUE; 3236 first_index = i; 3237 } 3238 nlocals += root_data[i]; 3239 } 3240 3241 /* cumulative of number of indexes and size of subset without holes */ 3242 #if defined(PETSC_HAVE_MPI_EXSCAN) 3243 start = 0; 3244 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3245 #else 3246 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3247 start = start-nlocals; 3248 #endif 3249 3250 if (N_n) { /* compute total size of new subset if requested */ 3251 *N_n = start + nlocals; 3252 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3253 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3254 } 3255 3256 /* adapt root data with cumulative */ 3257 if (first_found) { 3258 PetscInt old_index; 3259 3260 root_data[first_index] += start; 3261 old_index = first_index; 3262 for (i=first_index+1;i<Nl;i++) { 3263 if (root_data[i]) { 3264 root_data[i] += root_data[old_index]; 3265 old_index = i; 3266 } 3267 } 3268 } 3269 3270 /* from roots to leaves */ 3271 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3272 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3273 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3274 3275 /* create new IS with global indexes without holes */ 3276 if (subset_mult) { 3277 const PetscInt* idxs_mult; 3278 PetscInt cum; 3279 3280 cum = 0; 3281 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3282 for (i=0;i<n;i++) { 3283 PetscInt j; 3284 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3285 } 3286 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3287 } else { 3288 for (i=0;i<n;i++) { 3289 gidxs[i] = leaf_data[i]-1; 3290 } 3291 } 3292 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3293 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3294 PetscFunctionReturn(0); 3295 } 3296 3297 #undef __FUNCT__ 3298 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3299 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3300 { 3301 PetscInt i,j; 3302 PetscScalar *alphas; 3303 PetscErrorCode ierr; 3304 3305 PetscFunctionBegin; 3306 /* this implements stabilized Gram-Schmidt */ 3307 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3308 for (i=0;i<n;i++) { 3309 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3310 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3311 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3312 } 3313 ierr = PetscFree(alphas);CHKERRQ(ierr); 3314 PetscFunctionReturn(0); 3315 } 3316 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "MatISGetSubassemblingPattern" 3319 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3320 { 3321 IS ranks_send_to; 3322 PetscInt n_neighs,*neighs,*n_shared,**shared; 3323 PetscMPIInt size,rank,color; 3324 PetscInt *xadj,*adjncy; 3325 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3326 PetscInt i,local_size,threshold=0; 3327 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3328 PetscSubcomm subcomm; 3329 PetscErrorCode ierr; 3330 3331 PetscFunctionBegin; 3332 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3333 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3334 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3335 3336 /* Get info on mapping */ 3337 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3338 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3339 3340 /* build local CSR graph of subdomains' connectivity */ 3341 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3342 xadj[0] = 0; 3343 xadj[1] = PetscMax(n_neighs-1,0); 3344 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3345 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3346 3347 if (threshold) { 3348 PetscInt xadj_count = 0; 3349 for (i=1;i<n_neighs;i++) { 3350 if (n_shared[i] > threshold) { 3351 adjncy[xadj_count] = neighs[i]; 3352 adjncy_wgt[xadj_count] = n_shared[i]; 3353 xadj_count++; 3354 } 3355 } 3356 xadj[1] = xadj_count; 3357 } else { 3358 if (xadj[1]) { 3359 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3360 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3361 } 3362 } 3363 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3364 if (use_square) { 3365 for (i=0;i<xadj[1];i++) { 3366 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3367 } 3368 } 3369 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3370 3371 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3372 3373 /* 3374 Restrict work on active processes only. 3375 */ 3376 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3377 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3378 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3379 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3380 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3381 if (color) { 3382 ierr = PetscFree(xadj);CHKERRQ(ierr); 3383 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3384 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3385 } else { 3386 Mat subdomain_adj; 3387 IS new_ranks,new_ranks_contig; 3388 MatPartitioning partitioner; 3389 PetscInt prank,rstart=0,rend=0; 3390 PetscInt *is_indices,*oldranks; 3391 PetscBool aggregate; 3392 3393 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3394 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3395 prank = rank; 3396 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3397 /* 3398 for (i=0;i<size;i++) { 3399 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3400 } 3401 */ 3402 for (i=0;i<xadj[1];i++) { 3403 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3404 } 3405 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3406 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3407 if (aggregate) { 3408 PetscInt lrows,row,ncols,*cols; 3409 PetscMPIInt nrank; 3410 PetscScalar *vals; 3411 3412 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3413 lrows = 0; 3414 if (nrank<redprocs) { 3415 lrows = size/redprocs; 3416 if (nrank<size%redprocs) lrows++; 3417 } 3418 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3419 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3420 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3421 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3422 row = nrank; 3423 ncols = xadj[1]-xadj[0]; 3424 cols = adjncy; 3425 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3426 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3427 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3428 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3429 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3430 ierr = PetscFree(xadj);CHKERRQ(ierr); 3431 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3432 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3433 ierr = PetscFree(vals);CHKERRQ(ierr); 3434 } else { 3435 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3436 } 3437 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3438 3439 /* Partition */ 3440 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3441 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3442 if (use_vwgt) { 3443 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3444 v_wgt[0] = local_size; 3445 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3446 } 3447 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3448 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3449 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3450 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3451 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3452 3453 /* renumber new_ranks to avoid "holes" in new set of processors */ 3454 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3455 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3456 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3457 if (!redprocs) { 3458 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3459 } else { 3460 PetscInt idxs[1]; 3461 PetscMPIInt tag; 3462 MPI_Request *reqs; 3463 3464 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3465 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3466 for (i=rstart;i<rend;i++) { 3467 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3468 } 3469 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3470 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3471 ierr = PetscFree(reqs);CHKERRQ(ierr); 3472 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3473 } 3474 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3475 /* clean up */ 3476 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3477 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3478 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3479 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3480 } 3481 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3482 3483 /* assemble parallel IS for sends */ 3484 i = 1; 3485 if (color) i=0; 3486 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3487 /* get back IS */ 3488 *is_sends = ranks_send_to; 3489 PetscFunctionReturn(0); 3490 } 3491 3492 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3493 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "MatISSubassemble" 3496 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3497 { 3498 Mat local_mat; 3499 IS is_sends_internal; 3500 PetscInt rows,cols,new_local_rows; 3501 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3502 PetscBool ismatis,isdense,newisdense,destroy_mat; 3503 ISLocalToGlobalMapping l2gmap; 3504 PetscInt* l2gmap_indices; 3505 const PetscInt* is_indices; 3506 MatType new_local_type; 3507 /* buffers */ 3508 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3509 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3510 PetscInt *recv_buffer_idxs_local; 3511 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3512 /* MPI */ 3513 MPI_Comm comm,comm_n; 3514 PetscSubcomm subcomm; 3515 PetscMPIInt n_sends,n_recvs,commsize; 3516 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3517 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3518 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3519 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3520 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3521 PetscErrorCode ierr; 3522 3523 PetscFunctionBegin; 3524 /* TODO: add missing checks */ 3525 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3526 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3527 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3528 PetscValidLogicalCollectiveInt(mat,nis,7); 3529 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3530 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3531 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3532 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3533 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3534 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3535 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3536 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3537 PetscInt mrows,mcols,mnrows,mncols; 3538 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3539 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3540 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3541 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3542 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3543 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3544 } 3545 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3546 PetscValidLogicalCollectiveInt(mat,bs,0); 3547 /* prepare IS for sending if not provided */ 3548 if (!is_sends) { 3549 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3550 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3551 } else { 3552 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3553 is_sends_internal = is_sends; 3554 } 3555 3556 /* get comm */ 3557 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3558 3559 /* compute number of sends */ 3560 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3561 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3562 3563 /* compute number of receives */ 3564 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3565 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3566 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3567 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3568 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3569 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3570 ierr = PetscFree(iflags);CHKERRQ(ierr); 3571 3572 /* restrict comm if requested */ 3573 subcomm = 0; 3574 destroy_mat = PETSC_FALSE; 3575 if (restrict_comm) { 3576 PetscMPIInt color,subcommsize; 3577 3578 color = 0; 3579 if (restrict_full) { 3580 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3581 } else { 3582 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3583 } 3584 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3585 subcommsize = commsize - subcommsize; 3586 /* check if reuse has been requested */ 3587 if (reuse == MAT_REUSE_MATRIX) { 3588 if (*mat_n) { 3589 PetscMPIInt subcommsize2; 3590 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3591 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3592 comm_n = PetscObjectComm((PetscObject)*mat_n); 3593 } else { 3594 comm_n = PETSC_COMM_SELF; 3595 } 3596 } else { /* MAT_INITIAL_MATRIX */ 3597 PetscMPIInt rank; 3598 3599 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3600 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3601 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3602 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3603 comm_n = PetscSubcommChild(subcomm); 3604 } 3605 /* flag to destroy *mat_n if not significative */ 3606 if (color) destroy_mat = PETSC_TRUE; 3607 } else { 3608 comm_n = comm; 3609 } 3610 3611 /* prepare send/receive buffers */ 3612 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3613 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3614 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3615 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3616 if (nis) { 3617 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3618 } 3619 3620 /* Get data from local matrices */ 3621 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3622 /* TODO: See below some guidelines on how to prepare the local buffers */ 3623 /* 3624 send_buffer_vals should contain the raw values of the local matrix 3625 send_buffer_idxs should contain: 3626 - MatType_PRIVATE type 3627 - PetscInt size_of_l2gmap 3628 - PetscInt global_row_indices[size_of_l2gmap] 3629 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3630 */ 3631 else { 3632 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3633 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3634 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3635 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3636 send_buffer_idxs[1] = i; 3637 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3638 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3639 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3640 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3641 for (i=0;i<n_sends;i++) { 3642 ilengths_vals[is_indices[i]] = len*len; 3643 ilengths_idxs[is_indices[i]] = len+2; 3644 } 3645 } 3646 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3647 /* additional is (if any) */ 3648 if (nis) { 3649 PetscMPIInt psum; 3650 PetscInt j; 3651 for (j=0,psum=0;j<nis;j++) { 3652 PetscInt plen; 3653 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3654 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3655 psum += len+1; /* indices + lenght */ 3656 } 3657 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3658 for (j=0,psum=0;j<nis;j++) { 3659 PetscInt plen; 3660 const PetscInt *is_array_idxs; 3661 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3662 send_buffer_idxs_is[psum] = plen; 3663 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3664 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3665 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3666 psum += plen+1; /* indices + lenght */ 3667 } 3668 for (i=0;i<n_sends;i++) { 3669 ilengths_idxs_is[is_indices[i]] = psum; 3670 } 3671 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3672 } 3673 3674 buf_size_idxs = 0; 3675 buf_size_vals = 0; 3676 buf_size_idxs_is = 0; 3677 for (i=0;i<n_recvs;i++) { 3678 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3679 buf_size_vals += (PetscInt)olengths_vals[i]; 3680 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3681 } 3682 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3683 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3684 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3685 3686 /* get new tags for clean communications */ 3687 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3688 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3689 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3690 3691 /* allocate for requests */ 3692 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3693 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3694 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3695 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3696 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3697 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3698 3699 /* communications */ 3700 ptr_idxs = recv_buffer_idxs; 3701 ptr_vals = recv_buffer_vals; 3702 ptr_idxs_is = recv_buffer_idxs_is; 3703 for (i=0;i<n_recvs;i++) { 3704 source_dest = onodes[i]; 3705 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3706 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3707 ptr_idxs += olengths_idxs[i]; 3708 ptr_vals += olengths_vals[i]; 3709 if (nis) { 3710 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3711 ptr_idxs_is += olengths_idxs_is[i]; 3712 } 3713 } 3714 for (i=0;i<n_sends;i++) { 3715 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3716 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3717 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3718 if (nis) { 3719 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3720 } 3721 } 3722 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3723 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3724 3725 /* assemble new l2g map */ 3726 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3727 ptr_idxs = recv_buffer_idxs; 3728 new_local_rows = 0; 3729 for (i=0;i<n_recvs;i++) { 3730 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3731 ptr_idxs += olengths_idxs[i]; 3732 } 3733 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3734 ptr_idxs = recv_buffer_idxs; 3735 new_local_rows = 0; 3736 for (i=0;i<n_recvs;i++) { 3737 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3738 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3739 ptr_idxs += olengths_idxs[i]; 3740 } 3741 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3742 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3743 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3744 3745 /* infer new local matrix type from received local matrices type */ 3746 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3747 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3748 if (n_recvs) { 3749 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3750 ptr_idxs = recv_buffer_idxs; 3751 for (i=0;i<n_recvs;i++) { 3752 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3753 new_local_type_private = MATAIJ_PRIVATE; 3754 break; 3755 } 3756 ptr_idxs += olengths_idxs[i]; 3757 } 3758 switch (new_local_type_private) { 3759 case MATDENSE_PRIVATE: 3760 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3761 new_local_type = MATSEQAIJ; 3762 bs = 1; 3763 } else { /* if I receive only 1 dense matrix */ 3764 new_local_type = MATSEQDENSE; 3765 bs = 1; 3766 } 3767 break; 3768 case MATAIJ_PRIVATE: 3769 new_local_type = MATSEQAIJ; 3770 bs = 1; 3771 break; 3772 case MATBAIJ_PRIVATE: 3773 new_local_type = MATSEQBAIJ; 3774 break; 3775 case MATSBAIJ_PRIVATE: 3776 new_local_type = MATSEQSBAIJ; 3777 break; 3778 default: 3779 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3780 break; 3781 } 3782 } else { /* by default, new_local_type is seqdense */ 3783 new_local_type = MATSEQDENSE; 3784 bs = 1; 3785 } 3786 3787 /* create MATIS object if needed */ 3788 if (reuse == MAT_INITIAL_MATRIX) { 3789 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3790 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3791 } else { 3792 /* it also destroys the local matrices */ 3793 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3794 } 3795 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3796 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3797 3798 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3799 3800 /* Global to local map of received indices */ 3801 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3802 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3803 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3804 3805 /* restore attributes -> type of incoming data and its size */ 3806 buf_size_idxs = 0; 3807 for (i=0;i<n_recvs;i++) { 3808 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3809 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3810 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3811 } 3812 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3813 3814 /* set preallocation */ 3815 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3816 if (!newisdense) { 3817 PetscInt *new_local_nnz=0; 3818 3819 ptr_vals = recv_buffer_vals; 3820 ptr_idxs = recv_buffer_idxs_local; 3821 if (n_recvs) { 3822 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3823 } 3824 for (i=0;i<n_recvs;i++) { 3825 PetscInt j; 3826 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3827 for (j=0;j<*(ptr_idxs+1);j++) { 3828 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3829 } 3830 } else { 3831 /* TODO */ 3832 } 3833 ptr_idxs += olengths_idxs[i]; 3834 } 3835 if (new_local_nnz) { 3836 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3837 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3838 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3839 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3840 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3841 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3842 } else { 3843 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3844 } 3845 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3846 } else { 3847 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3848 } 3849 3850 /* set values */ 3851 ptr_vals = recv_buffer_vals; 3852 ptr_idxs = recv_buffer_idxs_local; 3853 for (i=0;i<n_recvs;i++) { 3854 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3855 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3856 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3857 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3858 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3859 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3860 } else { 3861 /* TODO */ 3862 } 3863 ptr_idxs += olengths_idxs[i]; 3864 ptr_vals += olengths_vals[i]; 3865 } 3866 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3867 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3868 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3869 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3870 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3871 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3872 3873 #if 0 3874 if (!restrict_comm) { /* check */ 3875 Vec lvec,rvec; 3876 PetscReal infty_error; 3877 3878 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3879 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3880 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3881 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3882 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3883 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3884 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3885 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3886 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3887 } 3888 #endif 3889 3890 /* assemble new additional is (if any) */ 3891 if (nis) { 3892 PetscInt **temp_idxs,*count_is,j,psum; 3893 3894 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3895 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3896 ptr_idxs = recv_buffer_idxs_is; 3897 psum = 0; 3898 for (i=0;i<n_recvs;i++) { 3899 for (j=0;j<nis;j++) { 3900 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3901 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3902 psum += plen; 3903 ptr_idxs += plen+1; /* shift pointer to received data */ 3904 } 3905 } 3906 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3907 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3908 for (i=1;i<nis;i++) { 3909 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3910 } 3911 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3912 ptr_idxs = recv_buffer_idxs_is; 3913 for (i=0;i<n_recvs;i++) { 3914 for (j=0;j<nis;j++) { 3915 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3916 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3917 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3918 ptr_idxs += plen+1; /* shift pointer to received data */ 3919 } 3920 } 3921 for (i=0;i<nis;i++) { 3922 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3923 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3924 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3925 } 3926 ierr = PetscFree(count_is);CHKERRQ(ierr); 3927 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3928 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3929 } 3930 /* free workspace */ 3931 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3932 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3933 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3934 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3935 if (isdense) { 3936 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3937 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3938 } else { 3939 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3940 } 3941 if (nis) { 3942 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3943 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3944 } 3945 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3946 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3947 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3948 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3949 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3950 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3951 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3952 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3953 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3954 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3955 ierr = PetscFree(onodes);CHKERRQ(ierr); 3956 if (nis) { 3957 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3958 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3959 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3960 } 3961 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3962 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3963 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3964 for (i=0;i<nis;i++) { 3965 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3966 } 3967 *mat_n = NULL; 3968 } 3969 PetscFunctionReturn(0); 3970 } 3971 3972 /* temporary hack into ksp private data structure */ 3973 #include <petsc/private/kspimpl.h> 3974 3975 #undef __FUNCT__ 3976 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3977 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3978 { 3979 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3980 PC_IS *pcis = (PC_IS*)pc->data; 3981 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3982 MatNullSpace CoarseNullSpace=NULL; 3983 ISLocalToGlobalMapping coarse_islg; 3984 IS coarse_is,*isarray; 3985 PetscInt i,im_active=-1,active_procs=-1; 3986 PetscInt nis,nisdofs,nisneu; 3987 PC pc_temp; 3988 PCType coarse_pc_type; 3989 KSPType coarse_ksp_type; 3990 PetscBool multilevel_requested,multilevel_allowed; 3991 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3992 Mat t_coarse_mat_is; 3993 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3994 PetscMPIInt all_procs; 3995 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3996 PetscBool compute_vecs = PETSC_FALSE; 3997 PetscScalar *array; 3998 PetscErrorCode ierr; 3999 4000 PetscFunctionBegin; 4001 /* Assign global numbering to coarse dofs */ 4002 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4003 PetscInt ocoarse_size; 4004 compute_vecs = PETSC_TRUE; 4005 ocoarse_size = pcbddc->coarse_size; 4006 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4007 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4008 /* see if we can avoid some work */ 4009 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4010 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4011 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4012 PC pc; 4013 PetscBool isbddc; 4014 4015 /* temporary workaround since PCBDDC does not have a reset method so far */ 4016 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4017 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4018 if (isbddc) { 4019 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4020 } 4021 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4022 coarse_reuse = PETSC_FALSE; 4023 } else { /* we can safely reuse already computed coarse matrix */ 4024 coarse_reuse = PETSC_TRUE; 4025 } 4026 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4027 coarse_reuse = PETSC_FALSE; 4028 } 4029 /* reset any subassembling information */ 4030 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4031 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4032 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4033 coarse_reuse = PETSC_TRUE; 4034 } 4035 4036 /* count "active" (i.e. with positive local size) and "void" processes */ 4037 im_active = !!(pcis->n); 4038 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4039 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4040 void_procs = all_procs-active_procs; 4041 csin_type_simple = PETSC_TRUE; 4042 redist = PETSC_FALSE; 4043 if (pcbddc->current_level && void_procs) { 4044 csin_ml = PETSC_TRUE; 4045 ncoarse_ml = void_procs; 4046 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4047 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4048 csin_ds = PETSC_TRUE; 4049 ncoarse_ds = pcbddc->redistribute_coarse; 4050 redist = PETSC_TRUE; 4051 } else { 4052 csin_ds = PETSC_TRUE; 4053 ncoarse_ds = active_procs; 4054 redist = PETSC_TRUE; 4055 } 4056 } else { 4057 csin_ml = PETSC_FALSE; 4058 ncoarse_ml = all_procs; 4059 if (void_procs) { 4060 csin_ds = PETSC_TRUE; 4061 ncoarse_ds = void_procs; 4062 csin_type_simple = PETSC_FALSE; 4063 } else { 4064 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4065 csin_ds = PETSC_TRUE; 4066 ncoarse_ds = pcbddc->redistribute_coarse; 4067 redist = PETSC_TRUE; 4068 } else { 4069 csin_ds = PETSC_FALSE; 4070 ncoarse_ds = all_procs; 4071 } 4072 } 4073 } 4074 4075 /* 4076 test if we can go multilevel: three conditions must be satisfied: 4077 - we have not exceeded the number of levels requested 4078 - we can actually subassemble the active processes 4079 - we can find a suitable number of MPI processes where we can place the subassembled problem 4080 */ 4081 multilevel_allowed = PETSC_FALSE; 4082 multilevel_requested = PETSC_FALSE; 4083 if (pcbddc->current_level < pcbddc->max_levels) { 4084 multilevel_requested = PETSC_TRUE; 4085 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4086 multilevel_allowed = PETSC_FALSE; 4087 } else { 4088 multilevel_allowed = PETSC_TRUE; 4089 } 4090 } 4091 /* determine number of process partecipating to coarse solver */ 4092 if (multilevel_allowed) { 4093 ncoarse = ncoarse_ml; 4094 csin = csin_ml; 4095 redist = PETSC_FALSE; 4096 } else { 4097 ncoarse = ncoarse_ds; 4098 csin = csin_ds; 4099 } 4100 4101 /* creates temporary l2gmap and IS for coarse indexes */ 4102 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4103 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4104 4105 /* creates temporary MATIS object for coarse matrix */ 4106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4107 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4108 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4109 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4110 #if 0 4111 { 4112 PetscViewer viewer; 4113 char filename[256]; 4114 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4115 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4116 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4117 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4118 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4119 } 4120 #endif 4121 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4122 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4123 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4124 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4125 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4126 4127 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4128 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4129 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4130 const PetscInt *idxs; 4131 ISLocalToGlobalMapping tmap; 4132 4133 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4134 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4135 /* allocate space for temporary storage */ 4136 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4137 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4138 /* allocate for IS array */ 4139 nisdofs = pcbddc->n_ISForDofsLocal; 4140 nisneu = !!pcbddc->NeumannBoundariesLocal; 4141 nis = nisdofs + nisneu; 4142 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4143 /* dofs splitting */ 4144 for (i=0;i<nisdofs;i++) { 4145 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4146 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4147 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4148 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4149 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4150 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4151 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4152 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4153 } 4154 /* neumann boundaries */ 4155 if (pcbddc->NeumannBoundariesLocal) { 4156 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4157 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4158 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4159 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4160 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4161 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4162 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4163 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4164 } 4165 /* free memory */ 4166 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4167 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4168 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4169 } else { 4170 nis = 0; 4171 nisdofs = 0; 4172 nisneu = 0; 4173 isarray = NULL; 4174 } 4175 /* destroy no longer needed map */ 4176 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4177 4178 /* restrict on coarse candidates (if needed) */ 4179 coarse_mat_is = NULL; 4180 if (csin) { 4181 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4182 if (redist) { 4183 PetscMPIInt rank; 4184 PetscInt spc,n_spc_p1,dest[1],destsize; 4185 4186 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4187 spc = active_procs/ncoarse; 4188 n_spc_p1 = active_procs%ncoarse; 4189 if (im_active) { 4190 destsize = 1; 4191 if (rank > n_spc_p1*(spc+1)-1) { 4192 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4193 } else { 4194 dest[0] = rank/(spc+1); 4195 } 4196 } else { 4197 destsize = 0; 4198 } 4199 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4200 } else if (csin_type_simple) { 4201 PetscMPIInt rank; 4202 PetscInt issize,isidx; 4203 4204 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4205 if (im_active) { 4206 issize = 1; 4207 isidx = (PetscInt)rank; 4208 } else { 4209 issize = 0; 4210 isidx = -1; 4211 } 4212 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4213 } else { /* get a suitable subassembling pattern from MATIS code */ 4214 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4215 } 4216 4217 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4218 if (!redist || ncoarse <= void_procs) { 4219 PetscInt ncoarse_cand,tissize,*nisindices; 4220 PetscInt *coarse_candidates; 4221 const PetscInt* tisindices; 4222 4223 /* get coarse candidates' ranks in pc communicator */ 4224 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4225 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4226 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4227 if (!coarse_candidates[i]) { 4228 coarse_candidates[ncoarse_cand++]=i; 4229 } 4230 } 4231 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4232 4233 4234 if (pcbddc->dbg_flag) { 4235 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4236 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4237 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4238 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4239 for (i=0;i<ncoarse_cand;i++) { 4240 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4241 } 4242 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4243 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4244 } 4245 /* shift the pattern on coarse candidates */ 4246 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4247 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4248 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4249 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4250 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4251 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4252 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4253 } 4254 if (pcbddc->dbg_flag) { 4255 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4256 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4257 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4258 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4259 } 4260 } 4261 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4262 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4263 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4264 } else { /* this is the last level, so use just receiving processes in subcomm */ 4265 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4266 } 4267 } else { 4268 if (pcbddc->dbg_flag) { 4269 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4270 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4271 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4272 } 4273 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4274 coarse_mat_is = t_coarse_mat_is; 4275 } 4276 4277 /* create local to global scatters for coarse problem */ 4278 if (compute_vecs) { 4279 PetscInt lrows; 4280 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4281 if (coarse_mat_is) { 4282 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4283 } else { 4284 lrows = 0; 4285 } 4286 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4287 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4288 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4289 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4290 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4291 } 4292 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4293 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4294 4295 /* set defaults for coarse KSP and PC */ 4296 if (multilevel_allowed) { 4297 coarse_ksp_type = KSPRICHARDSON; 4298 coarse_pc_type = PCBDDC; 4299 } else { 4300 coarse_ksp_type = KSPPREONLY; 4301 coarse_pc_type = PCREDUNDANT; 4302 } 4303 4304 /* print some info if requested */ 4305 if (pcbddc->dbg_flag) { 4306 if (!multilevel_allowed) { 4307 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4308 if (multilevel_requested) { 4309 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4310 } else if (pcbddc->max_levels) { 4311 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4312 } 4313 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4314 } 4315 } 4316 4317 /* create the coarse KSP object only once with defaults */ 4318 if (coarse_mat_is) { 4319 MatReuse coarse_mat_reuse; 4320 PetscViewer dbg_viewer = NULL; 4321 if (pcbddc->dbg_flag) { 4322 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4323 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4324 } 4325 if (!pcbddc->coarse_ksp) { 4326 char prefix[256],str_level[16]; 4327 size_t len; 4328 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4329 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4330 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4331 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4332 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4333 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4334 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4335 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4336 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4337 /* prefix */ 4338 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4339 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4340 if (!pcbddc->current_level) { 4341 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4342 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4343 } else { 4344 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4345 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4346 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4347 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4348 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4349 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4350 } 4351 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4352 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4353 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4354 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4355 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4356 /* allow user customization */ 4357 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4358 } 4359 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4360 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4361 if (nisdofs) { 4362 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4363 for (i=0;i<nisdofs;i++) { 4364 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4365 } 4366 } 4367 if (nisneu) { 4368 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4369 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4370 } 4371 4372 /* get some info after set from options */ 4373 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4374 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4375 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4376 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4377 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4378 isbddc = PETSC_FALSE; 4379 } 4380 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4381 if (isredundant) { 4382 KSP inner_ksp; 4383 PC inner_pc; 4384 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4385 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4386 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4387 } 4388 4389 /* assemble coarse matrix */ 4390 if (coarse_reuse) { 4391 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4392 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4393 coarse_mat_reuse = MAT_REUSE_MATRIX; 4394 } else { 4395 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4396 } 4397 if (isbddc || isnn) { 4398 if (pcbddc->coarsening_ratio > 1) { 4399 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4400 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4401 if (pcbddc->dbg_flag) { 4402 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4403 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4404 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4405 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4406 } 4407 } 4408 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4409 } else { 4410 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4411 coarse_mat = coarse_mat_is; 4412 } 4413 } else { 4414 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4415 } 4416 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4417 4418 /* propagate symmetry info of coarse matrix */ 4419 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4420 if (pc->pmat->symmetric_set) { 4421 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4422 } 4423 if (pc->pmat->hermitian_set) { 4424 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4425 } 4426 if (pc->pmat->spd_set) { 4427 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4428 } 4429 /* set operators */ 4430 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4431 if (pcbddc->dbg_flag) { 4432 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4433 } 4434 } else { /* processes non partecipating to coarse solver (if any) */ 4435 coarse_mat = 0; 4436 } 4437 ierr = PetscFree(isarray);CHKERRQ(ierr); 4438 #if 0 4439 { 4440 PetscViewer viewer; 4441 char filename[256]; 4442 sprintf(filename,"coarse_mat.m"); 4443 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4444 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4445 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4446 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4447 } 4448 #endif 4449 4450 /* Compute coarse null space (special handling by BDDC only) */ 4451 #if 0 4452 if (pcbddc->NullSpace) { 4453 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4454 } 4455 #endif 4456 4457 if (pcbddc->coarse_ksp) { 4458 Vec crhs,csol; 4459 PetscBool ispreonly; 4460 4461 if (CoarseNullSpace) { 4462 if (isbddc) { 4463 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4464 } else { 4465 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4466 } 4467 } 4468 /* setup coarse ksp */ 4469 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4470 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4471 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4472 /* hack */ 4473 if (!csol) { 4474 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4475 } 4476 if (!crhs) { 4477 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4478 } 4479 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4480 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4481 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4482 KSP check_ksp; 4483 KSPType check_ksp_type; 4484 PC check_pc; 4485 Vec check_vec,coarse_vec; 4486 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4487 PetscInt its; 4488 PetscBool compute_eigs; 4489 PetscReal *eigs_r,*eigs_c; 4490 PetscInt neigs; 4491 const char *prefix; 4492 4493 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4494 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4495 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4496 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4497 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4498 if (ispreonly) { 4499 check_ksp_type = KSPPREONLY; 4500 compute_eigs = PETSC_FALSE; 4501 } else { 4502 check_ksp_type = KSPGMRES; 4503 compute_eigs = PETSC_TRUE; 4504 } 4505 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4506 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4507 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4508 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4509 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4510 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4511 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4512 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4513 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4514 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4515 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4516 /* create random vec */ 4517 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4518 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4519 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4520 if (CoarseNullSpace) { 4521 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4522 } 4523 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4524 /* solve coarse problem */ 4525 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4526 if (CoarseNullSpace) { 4527 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4528 } 4529 /* set eigenvalue estimation if preonly has not been requested */ 4530 if (compute_eigs) { 4531 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4532 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4533 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4534 lambda_max = eigs_r[neigs-1]; 4535 lambda_min = eigs_r[0]; 4536 if (pcbddc->use_coarse_estimates) { 4537 if (lambda_max>lambda_min) { 4538 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4539 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4540 } 4541 } 4542 } 4543 4544 /* check coarse problem residual error */ 4545 if (pcbddc->dbg_flag) { 4546 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4547 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4548 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4549 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4550 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4551 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4552 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4553 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4554 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4555 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4556 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4557 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4558 if (compute_eigs) { 4559 PetscReal lambda_max_s,lambda_min_s; 4560 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4561 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4562 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4563 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4564 for (i=0;i<neigs;i++) { 4565 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4566 } 4567 } 4568 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4569 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4570 } 4571 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4572 if (compute_eigs) { 4573 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4574 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4575 } 4576 } 4577 } 4578 /* print additional info */ 4579 if (pcbddc->dbg_flag) { 4580 /* waits until all processes reaches this point */ 4581 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4582 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4583 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4584 } 4585 4586 /* free memory */ 4587 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4588 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4589 PetscFunctionReturn(0); 4590 } 4591 4592 #undef __FUNCT__ 4593 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4594 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4595 { 4596 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4597 PC_IS* pcis = (PC_IS*)pc->data; 4598 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4599 IS subset,subset_mult,subset_n; 4600 PetscInt local_size,coarse_size=0; 4601 PetscInt *local_primal_indices=NULL; 4602 const PetscInt *t_local_primal_indices; 4603 PetscErrorCode ierr; 4604 4605 PetscFunctionBegin; 4606 /* Compute global number of coarse dofs */ 4607 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4608 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4609 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4610 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4611 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4612 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4613 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4614 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4615 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4616 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 4617 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4618 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4619 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4620 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4621 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4622 4623 /* check numbering */ 4624 if (pcbddc->dbg_flag) { 4625 PetscScalar coarsesum,*array; 4626 PetscInt i; 4627 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4628 4629 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4630 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4631 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4632 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4633 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4634 for (i=0;i<pcbddc->local_primal_size;i++) { 4635 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4636 } 4637 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4638 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4639 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4640 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4641 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4642 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4643 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4644 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4645 for (i=0;i<pcis->n;i++) { 4646 if (array[i] == 1.0) { 4647 set_error = PETSC_TRUE; 4648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4649 } 4650 } 4651 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4652 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4653 for (i=0;i<pcis->n;i++) { 4654 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4655 } 4656 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4657 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4658 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4659 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4660 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4661 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4662 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4663 PetscInt *gidxs; 4664 4665 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4666 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4667 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4668 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4669 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4670 for (i=0;i<pcbddc->local_primal_size;i++) { 4671 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4672 } 4673 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4674 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4675 } 4676 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4677 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4678 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4679 } 4680 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4681 /* get back data */ 4682 *coarse_size_n = coarse_size; 4683 *local_primal_indices_n = local_primal_indices; 4684 PetscFunctionReturn(0); 4685 } 4686 4687 #undef __FUNCT__ 4688 #define __FUNCT__ "PCBDDCGlobalToLocal" 4689 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4690 { 4691 IS localis_t; 4692 PetscInt i,lsize,*idxs,n; 4693 PetscScalar *vals; 4694 PetscErrorCode ierr; 4695 4696 PetscFunctionBegin; 4697 /* get indices in local ordering exploiting local to global map */ 4698 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4699 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4700 for (i=0;i<lsize;i++) vals[i] = 1.0; 4701 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4702 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4703 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4704 if (idxs) { /* multilevel guard */ 4705 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4706 } 4707 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4708 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4709 ierr = PetscFree(vals);CHKERRQ(ierr); 4710 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4711 /* now compute set in local ordering */ 4712 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4713 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4714 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4715 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4716 for (i=0,lsize=0;i<n;i++) { 4717 if (PetscRealPart(vals[i]) > 0.5) { 4718 lsize++; 4719 } 4720 } 4721 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4722 for (i=0,lsize=0;i<n;i++) { 4723 if (PetscRealPart(vals[i]) > 0.5) { 4724 idxs[lsize++] = i; 4725 } 4726 } 4727 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4728 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4729 *localis = localis_t; 4730 PetscFunctionReturn(0); 4731 } 4732 4733 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4734 #undef __FUNCT__ 4735 #define __FUNCT__ "PCBDDCMatMult_Private" 4736 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4737 { 4738 PCBDDCChange_ctx change_ctx; 4739 PetscErrorCode ierr; 4740 4741 PetscFunctionBegin; 4742 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4743 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4744 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4745 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4746 PetscFunctionReturn(0); 4747 } 4748 4749 #undef __FUNCT__ 4750 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4751 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4752 { 4753 PCBDDCChange_ctx change_ctx; 4754 PetscErrorCode ierr; 4755 4756 PetscFunctionBegin; 4757 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4758 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4759 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4760 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4761 PetscFunctionReturn(0); 4762 } 4763 4764 #undef __FUNCT__ 4765 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4766 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4767 { 4768 PC_IS *pcis=(PC_IS*)pc->data; 4769 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4770 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4771 Mat S_j; 4772 PetscInt *used_xadj,*used_adjncy; 4773 PetscBool free_used_adj; 4774 PetscErrorCode ierr; 4775 4776 PetscFunctionBegin; 4777 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4778 free_used_adj = PETSC_FALSE; 4779 if (pcbddc->sub_schurs_layers == -1) { 4780 used_xadj = NULL; 4781 used_adjncy = NULL; 4782 } else { 4783 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4784 used_xadj = pcbddc->mat_graph->xadj; 4785 used_adjncy = pcbddc->mat_graph->adjncy; 4786 } else if (pcbddc->computed_rowadj) { 4787 used_xadj = pcbddc->mat_graph->xadj; 4788 used_adjncy = pcbddc->mat_graph->adjncy; 4789 } else { 4790 PetscBool flg_row=PETSC_FALSE; 4791 const PetscInt *xadj,*adjncy; 4792 PetscInt nvtxs; 4793 4794 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4795 if (flg_row) { 4796 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4797 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4798 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4799 free_used_adj = PETSC_TRUE; 4800 } else { 4801 pcbddc->sub_schurs_layers = -1; 4802 used_xadj = NULL; 4803 used_adjncy = NULL; 4804 } 4805 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4806 } 4807 } 4808 4809 /* setup sub_schurs data */ 4810 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4811 if (!sub_schurs->use_mumps) { 4812 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4813 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4814 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4815 } else { 4816 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4817 PetscBool isseqaij; 4818 if (!pcbddc->use_vertices && reuse_solvers) { 4819 PetscInt n_vertices; 4820 4821 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4822 reuse_solvers = (PetscBool)!n_vertices; 4823 } 4824 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4825 if (!isseqaij) { 4826 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4827 if (matis->A == pcbddc->local_mat) { 4828 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4829 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4830 } else { 4831 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4832 } 4833 } 4834 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4835 } 4836 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4837 4838 /* free adjacency */ 4839 if (free_used_adj) { 4840 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4841 } 4842 PetscFunctionReturn(0); 4843 } 4844 4845 #undef __FUNCT__ 4846 #define __FUNCT__ "PCBDDCInitSubSchurs" 4847 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4848 { 4849 PC_IS *pcis=(PC_IS*)pc->data; 4850 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4851 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4852 PCBDDCGraph graph; 4853 PetscErrorCode ierr; 4854 4855 PetscFunctionBegin; 4856 /* attach interface graph for determining subsets */ 4857 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4858 IS verticesIS,verticescomm; 4859 PetscInt vsize,*idxs; 4860 4861 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4862 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4863 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4864 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4865 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4866 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4867 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4868 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4869 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4870 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4871 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4872 /* 4873 if (pcbddc->dbg_flag) { 4874 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4875 } 4876 */ 4877 } else { 4878 graph = pcbddc->mat_graph; 4879 } 4880 4881 /* sub_schurs init */ 4882 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4883 4884 /* free graph struct */ 4885 if (pcbddc->sub_schurs_rebuild) { 4886 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4887 } 4888 PetscFunctionReturn(0); 4889 } 4890