1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 29 30 if (pcbddc->dbg_flag) { 31 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 32 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 33 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 35 } 36 37 if (pcbddc->dbg_flag) { 38 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 39 } 40 41 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 42 43 /* max size of subsets */ 44 mss = 0; 45 for (i=0;i<sub_schurs->n_subs;i++) { 46 PetscInt subset_size; 47 48 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 49 mss = PetscMax(mss,subset_size); 50 } 51 52 /* min/max and threshold */ 53 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 54 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 55 nmax = PetscMax(nmin,nmax); 56 allocated_S_St = PETSC_FALSE; 57 if (nmin) { 58 allocated_S_St = PETSC_TRUE; 59 } 60 61 /* allocate lapack workspace */ 62 cum = cum2 = 0; 63 maxneigs = 0; 64 for (i=0;i<sub_schurs->n_subs;i++) { 65 PetscInt n,subset_size; 66 67 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 68 n = PetscMin(subset_size,nmax); 69 cum += subset_size; 70 cum2 += subset_size*n; 71 maxneigs = PetscMax(maxneigs,n); 72 } 73 if (mss) { 74 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 75 PetscBLASInt B_itype = 1; 76 PetscBLASInt B_N = mss; 77 PetscReal zero = 0.0; 78 PetscReal eps = 0.0; /* dlamch? */ 79 80 B_lwork = -1; 81 S = NULL; 82 St = NULL; 83 eigs = NULL; 84 eigv = NULL; 85 B_iwork = NULL; 86 B_ifail = NULL; 87 #if defined(PETSC_USE_COMPLEX) 88 rwork = NULL; 89 #endif 90 thresh = 1.0; 91 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 92 #if defined(PETSC_USE_COMPLEX) 93 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 94 #else 95 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 96 #endif 97 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 98 ierr = PetscFPTrapPop();CHKERRQ(ierr); 99 } else { 100 /* TODO */ 101 } 102 } else { 103 lwork = 0; 104 } 105 106 nv = 0; 107 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 108 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 109 } 110 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 111 if (allocated_S_St) { 112 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 113 } 114 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 115 #if defined(PETSC_USE_COMPLEX) 116 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 117 #endif 118 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 119 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 121 nv+cum,&pcbddc->adaptive_constraints_idxs, 122 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 123 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 124 125 maxneigs = 0; 126 cum = cum2 = cumarray = 0; 127 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 128 pcbddc->adaptive_constraints_data_ptr[0] = 0; 129 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 130 const PetscInt *idxs; 131 132 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 133 for (cum=0;cum<nv;cum++) { 134 pcbddc->adaptive_constraints_n[cum] = 1; 135 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 136 pcbddc->adaptive_constraints_data[cum] = 1.0; 137 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 138 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 139 } 140 cum2 = cum; 141 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 142 } 143 144 if (mss) { /* multilevel */ 145 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 147 } 148 149 for (i=0;i<sub_schurs->n_subs;i++) { 150 151 const PetscInt *idxs; 152 PetscReal infty = PETSC_MAX_REAL; 153 PetscInt j,subset_size,eigs_start = 0; 154 PetscBLASInt B_N; 155 PetscBool same_data = PETSC_FALSE; 156 157 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 158 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 159 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 160 if (sub_schurs->is_hermitian) { 161 PetscInt j,k; 162 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 163 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 } 166 for (j=0;j<subset_size;j++) { 167 for (k=j;k<subset_size;k++) { 168 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 169 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 170 } 171 } 172 } else { 173 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 174 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 } 176 } else { 177 S = Sarray + cumarray; 178 St = Starray + cumarray; 179 } 180 181 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 182 /* see if we can save some work */ 183 if (sub_schurs->n_subs == 1) { 184 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 185 } 186 187 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 188 B_neigs = 0; 189 } else { 190 /* Threshold: this is an heuristic for edges */ 191 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 192 193 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 194 PetscBLASInt B_itype = 1; 195 PetscBLASInt B_IL, B_IU; 196 PetscReal eps = -1.0; /* dlamch? */ 197 PetscInt nmin_s; 198 199 /* ask for eigenvalues larger than thresh */ 200 if (pcbddc->dbg_flag) { 201 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 202 } 203 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 204 #if defined(PETSC_USE_COMPLEX) 205 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 206 #else 207 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 208 #endif 209 ierr = PetscFPTrapPop();CHKERRQ(ierr); 210 if (B_ierr) { 211 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 212 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 213 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 214 } 215 216 if (B_neigs > nmax) { 217 if (pcbddc->dbg_flag) { 218 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 219 } 220 eigs_start = B_neigs -nmax; 221 B_neigs = nmax; 222 } 223 224 nmin_s = PetscMin(nmin,B_N); 225 if (B_neigs < nmin_s) { 226 PetscBLASInt B_neigs2; 227 228 B_IU = B_N - B_neigs; 229 B_IL = B_N - nmin_s + 1; 230 if (pcbddc->dbg_flag) { 231 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 232 } 233 if (sub_schurs->is_hermitian) { 234 PetscInt j; 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 for (j=0;j<subset_size;j++) { 239 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 240 } 241 } else { 242 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 244 } 245 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 246 #if defined(PETSC_USE_COMPLEX) 247 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 248 #else 249 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 250 #endif 251 ierr = PetscFPTrapPop();CHKERRQ(ierr); 252 B_neigs += B_neigs2; 253 } 254 if (B_ierr) { 255 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 256 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 257 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 if (pcbddc->dbg_flag) { 260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 261 for (j=0;j<B_neigs;j++) { 262 if (eigs[j] == 0.0) { 263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 264 } else { 265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 266 } 267 } 268 } 269 } else { 270 /* TODO */ 271 } 272 } 273 maxneigs = PetscMax(B_neigs,maxneigs); 274 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 275 if (B_neigs) { 276 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 277 278 if (pcbddc->dbg_flag > 1) { 279 PetscInt ii; 280 for (ii=0;ii<B_neigs;ii++) { 281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 282 for (j=0;j<B_N;j++) { 283 #if defined(PETSC_USE_COMPLEX) 284 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 287 #else 288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 289 #endif 290 } 291 } 292 } 293 #if 0 294 for (j=0;j<B_neigs;j++) { 295 PetscBLASInt Blas_N,Blas_one = 1.0; 296 PetscScalar norm; 297 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 298 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 299 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 300 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 301 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 302 } else { 303 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 304 } 305 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 306 } 307 #endif 308 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 309 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 310 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 311 cum++; 312 } 313 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 314 /* shift for next computation */ 315 cumarray += subset_size*subset_size; 316 } 317 if (pcbddc->dbg_flag) { 318 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 319 } 320 321 if (mss) { 322 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 323 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 324 /* destroy matrices (junk) */ 325 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 326 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 327 } 328 if (allocated_S_St) { 329 ierr = PetscFree2(S,St);CHKERRQ(ierr); 330 } 331 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 332 #if defined(PETSC_USE_COMPLEX) 333 ierr = PetscFree(rwork);CHKERRQ(ierr); 334 #endif 335 if (pcbddc->dbg_flag) { 336 PetscInt maxneigs_r; 337 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 338 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 339 } 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "PCBDDCSetUpSolvers" 345 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 346 { 347 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 348 PetscScalar *coarse_submat_vals; 349 PetscErrorCode ierr; 350 351 PetscFunctionBegin; 352 /* Setup local scatters R_to_B and (optionally) R_to_D */ 353 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 354 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 355 356 /* Setup local neumann solver ksp_R */ 357 /* PCBDDCSetUpLocalScatters should be called first! */ 358 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 359 360 /* Change global null space passed in by the user if change of basis has been requested */ 361 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 362 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 363 } 364 365 /* 366 Setup local correction and local part of coarse basis. 367 Gives back the dense local part of the coarse matrix in column major ordering 368 */ 369 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 370 371 /* Compute total number of coarse nodes and setup coarse solver */ 372 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 373 374 /* free */ 375 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378 379 #undef __FUNCT__ 380 #define __FUNCT__ "PCBDDCResetCustomization" 381 PetscErrorCode PCBDDCResetCustomization(PC pc) 382 { 383 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 384 PetscErrorCode ierr; 385 386 PetscFunctionBegin; 387 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 390 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 392 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 393 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 394 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 395 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 396 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 397 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 #undef __FUNCT__ 402 #define __FUNCT__ "PCBDDCResetTopography" 403 PetscErrorCode PCBDDCResetTopography(PC pc) 404 { 405 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 406 PetscErrorCode ierr; 407 408 PetscFunctionBegin; 409 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 411 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 412 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 413 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "PCBDDCResetSolvers" 419 PetscErrorCode PCBDDCResetSolvers(PC pc) 420 { 421 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 422 PetscScalar *array; 423 PetscErrorCode ierr; 424 425 PetscFunctionBegin; 426 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 427 if (pcbddc->coarse_phi_B) { 428 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 429 ierr = PetscFree(array);CHKERRQ(ierr); 430 } 431 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 432 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 437 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 438 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 440 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 441 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 442 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 443 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 444 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 445 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 446 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 447 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 448 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 449 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 450 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 451 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 452 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 459 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 460 { 461 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 462 PC_IS *pcis = (PC_IS*)pc->data; 463 VecType impVecType; 464 PetscInt n_constraints,n_R,old_size; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 469 /* get sizes */ 470 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 471 n_R = pcis->n-pcbddc->n_vertices; 472 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 473 /* local work vectors (try to avoid unneeded work)*/ 474 /* R nodes */ 475 old_size = -1; 476 if (pcbddc->vec1_R) { 477 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 478 } 479 if (n_R != old_size) { 480 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 482 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 483 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 484 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 485 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 486 } 487 /* local primal dofs */ 488 old_size = -1; 489 if (pcbddc->vec1_P) { 490 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 491 } 492 if (pcbddc->local_primal_size != old_size) { 493 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 494 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 495 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 496 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 497 } 498 /* local explicit constraints */ 499 old_size = -1; 500 if (pcbddc->vec1_C) { 501 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 502 } 503 if (n_constraints && n_constraints != old_size) { 504 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 505 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 506 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 507 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 508 } 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "PCBDDCSetUpCorrection" 514 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 515 { 516 PetscErrorCode ierr; 517 /* pointers to pcis and pcbddc */ 518 PC_IS* pcis = (PC_IS*)pc->data; 519 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 520 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 521 /* submatrices of local problem */ 522 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 523 /* submatrices of local coarse problem */ 524 Mat S_VV,S_CV,S_VC,S_CC; 525 /* working matrices */ 526 Mat C_CR; 527 /* additional working stuff */ 528 PC pc_R; 529 Mat F; 530 PetscBool isLU,isCHOL,isILU; 531 532 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 533 PetscScalar *work; 534 PetscInt *idx_V_B; 535 PetscInt n,n_vertices,n_constraints; 536 PetscInt i,n_R,n_D,n_B; 537 PetscBool unsymmetric_check; 538 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 539 MatType impMatType; 540 /* some shortcuts to scalars */ 541 PetscScalar one=1.0,m_one=-1.0; 542 543 PetscFunctionBegin; 544 n_vertices = pcbddc->n_vertices; 545 n_constraints = pcbddc->local_primal_size-n_vertices; 546 /* Set Non-overlapping dimensions */ 547 n_B = pcis->n_B; 548 n_D = pcis->n - n_B; 549 n_R = pcis->n - n_vertices; 550 551 /* Set types for local objects needed by BDDC precondtioner */ 552 impMatType = MATSEQDENSE; 553 554 /* vertices in boundary numbering */ 555 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 556 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 557 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 558 559 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 560 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 568 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 569 570 unsymmetric_check = PETSC_FALSE; 571 /* allocate workspace */ 572 n = 0; 573 if (n_constraints) { 574 n += n_R*n_constraints; 575 } 576 if (n_vertices) { 577 n = PetscMax(2*n_R*n_vertices,n); 578 n = PetscMax((n_R+n_B)*n_vertices,n); 579 } 580 if (!pcbddc->symmetric_primal) { 581 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 582 unsymmetric_check = PETSC_TRUE; 583 } 584 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 585 586 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 587 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 590 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 591 if (isLU || isILU || isCHOL) { 592 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 593 } else if (sub_schurs->reuse_mumps) { 594 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 595 MatFactorType type; 596 597 F = reuse_mumps->F; 598 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 599 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 600 } else { 601 F = NULL; 602 } 603 604 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 605 if (n_constraints) { 606 Mat M1,M2,M3; 607 Mat auxmat; 608 IS is_aux; 609 PetscScalar *array,*array2; 610 611 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 614 /* Extract constraints on R nodes: C_{CR} */ 615 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 616 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 617 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 618 619 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 620 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 621 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 622 for (i=0;i<n_constraints;i++) { 623 const PetscScalar *row_cmat_values; 624 const PetscInt *row_cmat_indices; 625 PetscInt size_of_constraint,j; 626 627 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 628 for (j=0;j<size_of_constraint;j++) { 629 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 630 } 631 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 632 } 633 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 634 if (F) { 635 Mat B; 636 637 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 638 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 639 ierr = MatDestroy(&B);CHKERRQ(ierr); 640 } else { 641 PetscScalar *marr; 642 643 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 for (i=0;i<n_constraints;i++) { 645 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 646 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 650 } 651 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 652 } 653 if (!pcbddc->switch_static) { 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 655 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 656 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 660 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 661 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 662 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 663 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 664 } 665 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 666 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 667 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 668 } else { 669 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 670 pcbddc->local_auxmat2 = local_auxmat2_R; 671 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 672 } 673 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 674 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 675 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 677 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 678 if (isCHOL) { 679 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 680 } else { 681 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 682 } 683 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 684 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 685 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 686 ierr = MatDestroy(&M2);CHKERRQ(ierr); 687 ierr = MatDestroy(&M3);CHKERRQ(ierr); 688 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 689 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 690 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 691 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 692 ierr = MatDestroy(&M1);CHKERRQ(ierr); 693 } 694 /* Get submatrices from subdomain matrix */ 695 if (n_vertices) { 696 IS is_aux; 697 698 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 699 IS tis; 700 701 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 702 ierr = ISSort(tis);CHKERRQ(ierr); 703 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 704 ierr = ISDestroy(&tis);CHKERRQ(ierr); 705 } else { 706 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 707 } 708 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 709 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 710 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 711 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 712 } 713 714 /* Matrix of coarse basis functions (local) */ 715 if (pcbddc->coarse_phi_B) { 716 PetscInt on_B,on_primal,on_D=n_D; 717 if (pcbddc->coarse_phi_D) { 718 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 719 } 720 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 721 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 722 PetscScalar *marray; 723 724 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 725 ierr = PetscFree(marray);CHKERRQ(ierr); 726 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 727 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 728 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 729 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 730 } 731 } 732 733 if (!pcbddc->coarse_phi_B) { 734 PetscScalar *marray; 735 736 n = n_B*pcbddc->local_primal_size; 737 if (pcbddc->switch_static || pcbddc->dbg_flag) { 738 n += n_D*pcbddc->local_primal_size; 739 } 740 if (!pcbddc->symmetric_primal) { 741 n *= 2; 742 } 743 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 745 n = n_B*pcbddc->local_primal_size; 746 if (pcbddc->switch_static || pcbddc->dbg_flag) { 747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 748 n += n_D*pcbddc->local_primal_size; 749 } 750 if (!pcbddc->symmetric_primal) { 751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 752 if (pcbddc->switch_static || pcbddc->dbg_flag) { 753 n = n_B*pcbddc->local_primal_size; 754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 755 } 756 } else { 757 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 758 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 760 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 761 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 762 } 763 } 764 } 765 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 766 /* vertices */ 767 if (n_vertices) { 768 769 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 770 771 if (n_R) { 772 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 773 PetscBLASInt B_N,B_one = 1; 774 PetscScalar *x,*y; 775 PetscBool isseqaij; 776 777 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 778 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 780 if (F) { /* TODO could be optimized for symmetric problems */ 781 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 782 } else { 783 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 784 for (i=0;i<n_vertices;i++) { 785 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 786 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 787 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 788 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 789 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 790 } 791 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 792 } 793 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 794 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 795 if (n_constraints) { 796 Mat B; 797 798 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 799 for (i=0;i<n_vertices;i++) { 800 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 801 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 802 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 803 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 804 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 805 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 806 } 807 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 808 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 809 ierr = MatDestroy(&B);CHKERRQ(ierr); 810 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 811 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 812 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 813 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 814 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 815 ierr = MatDestroy(&B);CHKERRQ(ierr); 816 } 817 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 818 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 819 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 820 } 821 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 822 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 823 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 824 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 825 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 826 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 827 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 828 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 829 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 830 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 831 } else { 832 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 833 } 834 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 835 /* coarse basis functions */ 836 for (i=0;i<n_vertices;i++) { 837 PetscScalar *y; 838 839 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 840 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 841 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 842 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 y[n_B*i+idx_V_B[i]] = 1.0; 845 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 846 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 847 848 if (pcbddc->switch_static || pcbddc->dbg_flag) { 849 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 850 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 851 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 853 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 854 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 855 } 856 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 857 } 858 /* if n_R == 0 the object is not destroyed */ 859 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 860 } 861 862 if (n_constraints) { 863 Mat B; 864 865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 866 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 867 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 868 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 869 if (n_vertices) { 870 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 871 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 872 } else { 873 Mat S_VCt; 874 875 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 876 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 877 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 878 } 879 } 880 ierr = MatDestroy(&B);CHKERRQ(ierr); 881 /* coarse basis functions */ 882 for (i=0;i<n_constraints;i++) { 883 PetscScalar *y; 884 885 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 886 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 887 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 888 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 891 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 892 if (pcbddc->switch_static || pcbddc->dbg_flag) { 893 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 894 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 895 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 898 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 899 } 900 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 901 } 902 } 903 if (n_constraints) { 904 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 905 } 906 907 /* compute other basis functions for non-symmetric problems */ 908 if (!pcbddc->symmetric_primal) { 909 910 if (n_constraints) { 911 Mat S_CCT,B_C; 912 913 /* this is a lazy thing */ 914 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 915 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 916 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 917 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 918 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 919 if (n_vertices) { 920 Mat B_V,S_VCT; 921 922 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 923 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 924 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 925 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 926 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 927 } 928 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 929 } else { /* if there are no constraints, reset work */ 930 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 931 } 932 if (n_vertices && n_R) { 933 Mat A_VRT; 934 PetscScalar *marray; 935 PetscBLASInt B_N,B_one = 1; 936 937 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 938 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 939 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 940 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 941 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 942 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 943 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 944 } 945 946 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 947 for (i=0;i<pcbddc->local_primal_size;i++) { 948 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 949 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 950 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 951 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 952 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 953 } 954 } else { 955 for (i=0;i<pcbddc->local_primal_size;i++) { 956 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 957 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 958 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 959 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 960 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 961 } 962 } 963 /* coarse basis functions */ 964 for (i=0;i<pcbddc->local_primal_size;i++) { 965 PetscScalar *y; 966 967 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 968 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 969 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 970 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 971 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 if (i<n_vertices) { 973 y[n_B*i+idx_V_B[i]] = 1.0; 974 } 975 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 976 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 977 978 if (pcbddc->switch_static || pcbddc->dbg_flag) { 979 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 980 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 981 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 982 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 984 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 985 } 986 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 987 } 988 } 989 /* free memory */ 990 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 991 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 992 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 993 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 994 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 995 ierr = PetscFree(work);CHKERRQ(ierr); 996 if (n_vertices) { 997 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 998 } 999 if (n_constraints) { 1000 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1001 } 1002 /* Checking coarse_sub_mat and coarse basis functios */ 1003 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1004 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1005 if (pcbddc->dbg_flag) { 1006 Mat coarse_sub_mat; 1007 Mat AUXMAT,TM1,TM2,TM3,TM4; 1008 Mat coarse_phi_D,coarse_phi_B; 1009 Mat coarse_psi_D,coarse_psi_B; 1010 Mat A_II,A_BB,A_IB,A_BI; 1011 Mat C_B,CPHI; 1012 IS is_dummy; 1013 Vec mones; 1014 MatType checkmattype=MATSEQAIJ; 1015 PetscReal real_value; 1016 1017 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1018 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1019 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1020 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1021 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1022 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1023 if (unsymmetric_check) { 1024 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1025 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1026 } 1027 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1028 1029 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1030 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1032 if (unsymmetric_check) { 1033 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1034 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1035 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1036 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1037 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1038 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1039 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1040 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1041 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1042 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1043 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1044 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1045 } else { 1046 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1047 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1048 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1049 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1050 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1051 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1053 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1054 } 1055 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1056 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1057 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1058 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1059 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1060 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1061 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1062 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1063 1064 /* check constraints */ 1065 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1066 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1067 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1068 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1069 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1070 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1071 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1073 if (unsymmetric_check) { 1074 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1075 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1076 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1077 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1079 } 1080 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1081 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1082 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1083 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1084 1085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1086 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1087 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1088 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1089 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1090 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1091 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1092 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1093 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1094 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1095 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1096 if (unsymmetric_check) { 1097 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1098 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1099 } 1100 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1101 } 1102 /* get back data */ 1103 *coarse_submat_vals_n = coarse_submat_vals; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1109 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1110 { 1111 Mat *work_mat; 1112 IS isrow_s,iscol_s; 1113 PetscBool rsorted,csorted; 1114 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1119 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1120 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1121 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1122 1123 if (!rsorted) { 1124 const PetscInt *idxs; 1125 PetscInt *idxs_sorted,i; 1126 1127 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1128 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1129 for (i=0;i<rsize;i++) { 1130 idxs_perm_r[i] = i; 1131 } 1132 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1133 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1134 for (i=0;i<rsize;i++) { 1135 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1136 } 1137 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1138 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1139 } else { 1140 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1141 isrow_s = isrow; 1142 } 1143 1144 if (!csorted) { 1145 if (isrow == iscol) { 1146 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1147 iscol_s = isrow_s; 1148 } else { 1149 const PetscInt *idxs; 1150 PetscInt *idxs_sorted,i; 1151 1152 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1153 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1154 for (i=0;i<csize;i++) { 1155 idxs_perm_c[i] = i; 1156 } 1157 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1158 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1159 for (i=0;i<csize;i++) { 1160 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1161 } 1162 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1163 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1164 } 1165 } else { 1166 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1167 iscol_s = iscol; 1168 } 1169 1170 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1171 1172 if (!rsorted || !csorted) { 1173 Mat new_mat; 1174 IS is_perm_r,is_perm_c; 1175 1176 if (!rsorted) { 1177 PetscInt *idxs_r,i; 1178 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1179 for (i=0;i<rsize;i++) { 1180 idxs_r[idxs_perm_r[i]] = i; 1181 } 1182 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1183 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1184 } else { 1185 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1186 } 1187 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1188 1189 if (!csorted) { 1190 if (isrow_s == iscol_s) { 1191 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1192 is_perm_c = is_perm_r; 1193 } else { 1194 PetscInt *idxs_c,i; 1195 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1196 for (i=0;i<csize;i++) { 1197 idxs_c[idxs_perm_c[i]] = i; 1198 } 1199 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1200 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1201 } 1202 } else { 1203 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1204 } 1205 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1206 1207 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1208 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1209 work_mat[0] = new_mat; 1210 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1211 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1212 } 1213 1214 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1215 *B = work_mat[0]; 1216 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1217 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1218 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1224 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1225 { 1226 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1227 PC_IS* pcis = (PC_IS*)pc->data; 1228 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1229 Mat new_mat; 1230 IS is_local,is_global; 1231 PetscInt local_size; 1232 PetscBool isseqaij; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1237 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1238 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1239 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1240 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1241 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1242 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1243 1244 /* check */ 1245 if (pcbddc->dbg_flag) { 1246 Vec x,x_change; 1247 PetscReal error; 1248 1249 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1250 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1251 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1252 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1255 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1257 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1258 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1259 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1260 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1261 ierr = VecDestroy(&x);CHKERRQ(ierr); 1262 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1263 } 1264 1265 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1266 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1267 if (isseqaij) { 1268 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1269 } else { 1270 Mat work_mat; 1271 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1272 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1273 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1274 } 1275 if (matis->A->symmetric_set) { 1276 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1277 #if !defined(PETSC_USE_COMPLEX) 1278 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1279 #endif 1280 } 1281 /* 1282 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1283 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1284 */ 1285 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1291 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1292 { 1293 PC_IS* pcis = (PC_IS*)(pc->data); 1294 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1295 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1296 PetscInt *idx_R_local=NULL; 1297 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1298 PetscInt vbs,bs; 1299 PetscBT bitmask=NULL; 1300 PetscErrorCode ierr; 1301 1302 PetscFunctionBegin; 1303 /* 1304 No need to setup local scatters if 1305 - primal space is unchanged 1306 AND 1307 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1308 AND 1309 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1310 */ 1311 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1312 PetscFunctionReturn(0); 1313 } 1314 /* destroy old objects */ 1315 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1316 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1317 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1318 /* Set Non-overlapping dimensions */ 1319 n_B = pcis->n_B; 1320 n_D = pcis->n - n_B; 1321 n_vertices = pcbddc->n_vertices; 1322 1323 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1324 1325 /* create auxiliary bitmask and allocate workspace */ 1326 if (!sub_schurs->reuse_mumps) { 1327 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1328 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1329 for (i=0;i<n_vertices;i++) { 1330 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1331 } 1332 1333 for (i=0, n_R=0; i<pcis->n; i++) { 1334 if (!PetscBTLookup(bitmask,i)) { 1335 idx_R_local[n_R++] = i; 1336 } 1337 } 1338 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1339 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1340 1341 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1342 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1343 } 1344 1345 /* Block code */ 1346 vbs = 1; 1347 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1348 if (bs>1 && !(n_vertices%bs)) { 1349 PetscBool is_blocked = PETSC_TRUE; 1350 PetscInt *vary; 1351 if (!sub_schurs->reuse_mumps) { 1352 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1353 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1354 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1355 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1356 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1357 for (i=0; i<pcis->n/bs; i++) { 1358 if (vary[i]!=0 && vary[i]!=bs) { 1359 is_blocked = PETSC_FALSE; 1360 break; 1361 } 1362 } 1363 ierr = PetscFree(vary);CHKERRQ(ierr); 1364 } else { 1365 /* Verify directly the R set */ 1366 for (i=0; i<n_R/bs; i++) { 1367 PetscInt j,node=idx_R_local[bs*i]; 1368 for (j=1; j<bs; j++) { 1369 if (node != idx_R_local[bs*i+j]-j) { 1370 is_blocked = PETSC_FALSE; 1371 break; 1372 } 1373 } 1374 } 1375 } 1376 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1377 vbs = bs; 1378 for (i=0;i<n_R/vbs;i++) { 1379 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1380 } 1381 } 1382 } 1383 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1384 if (sub_schurs->reuse_mumps) { 1385 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1386 1387 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1388 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1389 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1390 reuse_mumps->is_R = pcbddc->is_R_local; 1391 } else { 1392 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1393 } 1394 1395 /* print some info if requested */ 1396 if (pcbddc->dbg_flag) { 1397 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1399 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1400 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1401 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1402 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1403 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1404 } 1405 1406 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1407 if (!sub_schurs->reuse_mumps) { 1408 IS is_aux1,is_aux2; 1409 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1410 1411 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1412 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1413 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1414 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1415 for (i=0; i<n_D; i++) { 1416 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1417 } 1418 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1419 for (i=0, j=0; i<n_R; i++) { 1420 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1421 aux_array1[j++] = i; 1422 } 1423 } 1424 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1425 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1426 for (i=0, j=0; i<n_B; i++) { 1427 if (!PetscBTLookup(bitmask,is_indices[i])) { 1428 aux_array2[j++] = i; 1429 } 1430 } 1431 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1432 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1433 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1434 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1435 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1436 1437 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1438 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1439 for (i=0, j=0; i<n_R; i++) { 1440 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1441 aux_array1[j++] = i; 1442 } 1443 } 1444 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1445 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1446 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1447 } 1448 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1449 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1450 } else { 1451 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1452 IS tis; 1453 PetscInt schur_size; 1454 1455 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1456 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1457 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1458 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1459 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1460 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1461 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1462 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1463 } 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1471 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1472 { 1473 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1474 PC_IS *pcis = (PC_IS*)pc->data; 1475 PC pc_temp; 1476 Mat A_RR; 1477 MatReuse reuse; 1478 PetscScalar m_one = -1.0; 1479 PetscReal value; 1480 PetscInt n_D,n_R; 1481 PetscBool use_exact,use_exact_reduced,issbaij; 1482 PetscErrorCode ierr; 1483 /* prefixes stuff */ 1484 char dir_prefix[256],neu_prefix[256],str_level[16]; 1485 size_t len; 1486 1487 PetscFunctionBegin; 1488 1489 /* compute prefixes */ 1490 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1491 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1492 if (!pcbddc->current_level) { 1493 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1494 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1495 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1496 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1497 } else { 1498 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1499 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1500 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1501 len -= 15; /* remove "pc_bddc_coarse_" */ 1502 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1503 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1504 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1505 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1506 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1507 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1508 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1509 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1510 } 1511 1512 /* DIRICHLET PROBLEM */ 1513 if (dirichlet) { 1514 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1515 if (pcbddc->local_mat->symmetric_set) { 1516 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1517 } 1518 /* Matrix for Dirichlet problem is pcis->A_II */ 1519 n_D = pcis->n - pcis->n_B; 1520 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1521 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1522 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1523 /* default */ 1524 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1525 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1526 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1527 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1528 if (issbaij) { 1529 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1530 } else { 1531 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1532 } 1533 /* Allow user's customization */ 1534 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1535 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1536 } 1537 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1538 if (sub_schurs->reuse_mumps) { 1539 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1540 1541 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1542 } 1543 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1544 if (!n_D) { 1545 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1546 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1547 } 1548 /* Set Up KSP for Dirichlet problem of BDDC */ 1549 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1550 /* set ksp_D into pcis data */ 1551 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1552 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1553 pcis->ksp_D = pcbddc->ksp_D; 1554 } 1555 1556 /* NEUMANN PROBLEM */ 1557 A_RR = 0; 1558 if (neumann) { 1559 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1560 PetscInt ibs,mbs; 1561 PetscBool issbaij; 1562 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1563 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1564 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1565 if (pcbddc->ksp_R) { /* already created ksp */ 1566 PetscInt nn_R; 1567 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1568 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1569 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1570 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1571 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1572 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1573 reuse = MAT_INITIAL_MATRIX; 1574 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1575 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1576 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1577 reuse = MAT_INITIAL_MATRIX; 1578 } else { /* safe to reuse the matrix */ 1579 reuse = MAT_REUSE_MATRIX; 1580 } 1581 } 1582 /* last check */ 1583 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1584 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1585 reuse = MAT_INITIAL_MATRIX; 1586 } 1587 } else { /* first time, so we need to create the matrix */ 1588 reuse = MAT_INITIAL_MATRIX; 1589 } 1590 /* extract A_RR */ 1591 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1592 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1594 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1595 if (matis->A == pcbddc->local_mat) { 1596 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1597 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1598 } else { 1599 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1600 } 1601 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1602 if (matis->A == pcbddc->local_mat) { 1603 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1604 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1605 } else { 1606 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1607 } 1608 } 1609 if (!sub_schurs->reuse_mumps) { 1610 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1611 if (pcbddc->local_mat->symmetric_set) { 1612 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1613 } 1614 } else { 1615 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1616 1617 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1618 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1619 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1620 } 1621 if (!pcbddc->ksp_R) { /* create object if not present */ 1622 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1623 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1624 /* default */ 1625 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1626 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1627 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1628 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1629 if (issbaij) { 1630 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1631 } else { 1632 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1633 } 1634 /* Allow user's customization */ 1635 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1636 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1637 } 1638 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1639 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1640 if (!n_R) { 1641 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1642 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1643 } 1644 /* Reuse MUMPS solver if it is present */ 1645 if (sub_schurs->reuse_mumps) { 1646 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1647 1648 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1649 } 1650 /* Set Up KSP for Neumann problem of BDDC */ 1651 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1652 } 1653 /* free Neumann problem's matrix */ 1654 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1655 1656 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1657 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1658 if (pcbddc->dbg_flag) { 1659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1660 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1661 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1662 } 1663 if (dirichlet) { /* Dirichlet */ 1664 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1665 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1666 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1667 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1668 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1669 /* need to be adapted? */ 1670 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1671 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1672 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1673 /* print info */ 1674 if (pcbddc->dbg_flag) { 1675 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1676 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1677 } 1678 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1679 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1680 } 1681 } 1682 if (neumann) { /* Neumann */ 1683 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1684 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1685 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1686 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1687 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1688 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1689 /* need to be adapted? */ 1690 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1691 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1692 /* print info */ 1693 if (pcbddc->dbg_flag) { 1694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1695 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1696 } 1697 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1698 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1699 } 1700 } 1701 } 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1707 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1708 { 1709 PetscErrorCode ierr; 1710 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1711 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1712 1713 PetscFunctionBegin; 1714 if (!sub_schurs->reuse_mumps) { 1715 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1716 } 1717 if (!pcbddc->switch_static) { 1718 if (applytranspose && pcbddc->local_auxmat1) { 1719 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1720 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1721 } 1722 if (!sub_schurs->reuse_mumps) { 1723 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1724 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1725 } else { 1726 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1727 1728 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1729 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1730 } 1731 } else { 1732 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1733 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1734 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1735 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1736 if (applytranspose && pcbddc->local_auxmat1) { 1737 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1738 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1739 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1740 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1741 } 1742 } 1743 if (!sub_schurs->reuse_mumps) { 1744 if (applytranspose) { 1745 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1746 } else { 1747 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1748 } 1749 #if defined(PETSC_HAVE_MUMPS) 1750 } else { 1751 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1752 1753 if (applytranspose) { 1754 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1755 } else { 1756 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1757 } 1758 #endif 1759 } 1760 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1761 if (!pcbddc->switch_static) { 1762 if (!sub_schurs->reuse_mumps) { 1763 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1764 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1765 } else { 1766 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1767 1768 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1769 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1770 } 1771 if (!applytranspose && pcbddc->local_auxmat1) { 1772 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1773 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1774 } 1775 } else { 1776 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1778 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 if (!applytranspose && pcbddc->local_auxmat1) { 1781 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1782 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1783 } 1784 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1785 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1786 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1787 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1788 } 1789 PetscFunctionReturn(0); 1790 } 1791 1792 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1795 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1796 { 1797 PetscErrorCode ierr; 1798 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1799 PC_IS* pcis = (PC_IS*) (pc->data); 1800 const PetscScalar zero = 0.0; 1801 1802 PetscFunctionBegin; 1803 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1804 if (applytranspose) { 1805 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1806 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1807 } else { 1808 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1809 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1810 } 1811 /* start communications from local primal nodes to rhs of coarse solver */ 1812 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1813 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1814 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1815 1816 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1817 /* TODO remove null space when doing multilevel */ 1818 if (pcbddc->coarse_ksp) { 1819 Vec rhs,sol; 1820 1821 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1822 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1823 if (applytranspose) { 1824 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1825 } else { 1826 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1827 } 1828 } 1829 1830 /* Local solution on R nodes */ 1831 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1832 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1833 } 1834 1835 /* communications from coarse sol to local primal nodes */ 1836 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1837 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1838 1839 /* Sum contributions from two levels */ 1840 if (applytranspose) { 1841 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1842 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1843 } else { 1844 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1845 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1846 } 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1852 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1853 { 1854 PetscErrorCode ierr; 1855 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1856 PetscScalar *array; 1857 Vec from,to; 1858 1859 PetscFunctionBegin; 1860 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1861 from = pcbddc->coarse_vec; 1862 to = pcbddc->vec1_P; 1863 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1864 Vec tvec; 1865 1866 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1867 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1868 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1869 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1870 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1871 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1872 } 1873 } else { /* from local to global -> put data in coarse right hand side */ 1874 from = pcbddc->vec1_P; 1875 to = pcbddc->coarse_vec; 1876 } 1877 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1883 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1884 { 1885 PetscErrorCode ierr; 1886 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1887 PetscScalar *array; 1888 Vec from,to; 1889 1890 PetscFunctionBegin; 1891 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1892 from = pcbddc->coarse_vec; 1893 to = pcbddc->vec1_P; 1894 } else { /* from local to global -> put data in coarse right hand side */ 1895 from = pcbddc->vec1_P; 1896 to = pcbddc->coarse_vec; 1897 } 1898 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1899 if (smode == SCATTER_FORWARD) { 1900 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1901 Vec tvec; 1902 1903 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1904 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1905 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1906 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1907 } 1908 } else { 1909 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1910 ierr = VecResetArray(from);CHKERRQ(ierr); 1911 } 1912 } 1913 PetscFunctionReturn(0); 1914 } 1915 1916 /* uncomment for testing purposes */ 1917 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1920 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1921 { 1922 PetscErrorCode ierr; 1923 PC_IS* pcis = (PC_IS*)(pc->data); 1924 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1925 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1926 /* one and zero */ 1927 PetscScalar one=1.0,zero=0.0; 1928 /* space to store constraints and their local indices */ 1929 PetscScalar *constraints_data; 1930 PetscInt *constraints_idxs,*constraints_idxs_B; 1931 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1932 PetscInt *constraints_n; 1933 /* iterators */ 1934 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1935 /* BLAS integers */ 1936 PetscBLASInt lwork,lierr; 1937 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1938 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1939 /* reuse */ 1940 PetscInt olocal_primal_size,olocal_primal_size_cc; 1941 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1942 /* change of basis */ 1943 PetscBool qr_needed; 1944 PetscBT change_basis,qr_needed_idx; 1945 /* auxiliary stuff */ 1946 PetscInt *nnz,*is_indices; 1947 PetscInt ncc; 1948 /* some quantities */ 1949 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1950 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1951 1952 PetscFunctionBegin; 1953 /* Destroy Mat objects computed previously */ 1954 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1955 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1956 /* save info on constraints from previous setup (if any) */ 1957 olocal_primal_size = pcbddc->local_primal_size; 1958 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1959 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1960 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1961 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1962 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1963 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1964 1965 /* print some info */ 1966 if (pcbddc->dbg_flag) { 1967 IS vertices; 1968 PetscInt nv,nedges,nfaces; 1969 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1970 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1971 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1972 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1973 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1975 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1976 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1977 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1978 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1979 } 1980 1981 if (!pcbddc->adaptive_selection) { 1982 IS ISForVertices,*ISForFaces,*ISForEdges; 1983 MatNullSpace nearnullsp; 1984 const Vec *nearnullvecs; 1985 Vec *localnearnullsp; 1986 PetscScalar *array; 1987 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1988 PetscBool nnsp_has_cnst; 1989 /* LAPACK working arrays for SVD or POD */ 1990 PetscBool skip_lapack,boolforchange; 1991 PetscScalar *work; 1992 PetscReal *singular_vals; 1993 #if defined(PETSC_USE_COMPLEX) 1994 PetscReal *rwork; 1995 #endif 1996 #if defined(PETSC_MISSING_LAPACK_GESVD) 1997 PetscScalar *temp_basis,*correlation_mat; 1998 #else 1999 PetscBLASInt dummy_int=1; 2000 PetscScalar dummy_scalar=1.; 2001 #endif 2002 2003 /* Get index sets for faces, edges and vertices from graph */ 2004 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2005 /* free unneeded index sets */ 2006 if (!pcbddc->use_vertices) { 2007 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2008 } 2009 if (!pcbddc->use_edges) { 2010 for (i=0;i<n_ISForEdges;i++) { 2011 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2012 } 2013 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2014 n_ISForEdges = 0; 2015 } 2016 if (!pcbddc->use_faces) { 2017 for (i=0;i<n_ISForFaces;i++) { 2018 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2019 } 2020 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2021 n_ISForFaces = 0; 2022 } 2023 2024 #if defined(PETSC_USE_DEBUG) 2025 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2026 Also use_change_of_basis should be consistent among processors */ 2027 if (pcbddc->NullSpace) { 2028 PetscBool tbool[2],gbool[2]; 2029 2030 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2031 pcbddc->use_change_of_basis = PETSC_TRUE; 2032 if (!ISForEdges) { 2033 pcbddc->use_change_on_faces = PETSC_TRUE; 2034 } 2035 } 2036 tbool[0] = pcbddc->use_change_of_basis; 2037 tbool[1] = pcbddc->use_change_on_faces; 2038 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2039 pcbddc->use_change_of_basis = gbool[0]; 2040 pcbddc->use_change_on_faces = gbool[1]; 2041 } 2042 #endif 2043 2044 /* check if near null space is attached to global mat */ 2045 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2046 if (nearnullsp) { 2047 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2048 /* remove any stored info */ 2049 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2050 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2051 /* store information for BDDC solver reuse */ 2052 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2053 pcbddc->onearnullspace = nearnullsp; 2054 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2055 for (i=0;i<nnsp_size;i++) { 2056 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2057 } 2058 } else { /* if near null space is not provided BDDC uses constants by default */ 2059 nnsp_size = 0; 2060 nnsp_has_cnst = PETSC_TRUE; 2061 } 2062 /* get max number of constraints on a single cc */ 2063 max_constraints = nnsp_size; 2064 if (nnsp_has_cnst) max_constraints++; 2065 2066 /* 2067 Evaluate maximum storage size needed by the procedure 2068 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2069 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2070 There can be multiple constraints per connected component 2071 */ 2072 n_vertices = 0; 2073 if (ISForVertices) { 2074 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2075 } 2076 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2077 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2078 2079 total_counts = n_ISForFaces+n_ISForEdges; 2080 total_counts *= max_constraints; 2081 total_counts += n_vertices; 2082 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2083 2084 total_counts = 0; 2085 max_size_of_constraint = 0; 2086 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2087 IS used_is; 2088 if (i<n_ISForEdges) { 2089 used_is = ISForEdges[i]; 2090 } else { 2091 used_is = ISForFaces[i-n_ISForEdges]; 2092 } 2093 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2094 total_counts += j; 2095 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2096 } 2097 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2098 2099 /* get local part of global near null space vectors */ 2100 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2101 for (k=0;k<nnsp_size;k++) { 2102 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2103 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2104 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2105 } 2106 2107 /* whether or not to skip lapack calls */ 2108 skip_lapack = PETSC_TRUE; 2109 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2110 2111 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2112 if (!skip_lapack) { 2113 PetscScalar temp_work; 2114 2115 #if defined(PETSC_MISSING_LAPACK_GESVD) 2116 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2117 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2118 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2119 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2120 #if defined(PETSC_USE_COMPLEX) 2121 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2122 #endif 2123 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2124 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2125 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2126 lwork = -1; 2127 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2128 #if !defined(PETSC_USE_COMPLEX) 2129 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2130 #else 2131 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2132 #endif 2133 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2134 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2135 #else /* on missing GESVD */ 2136 /* SVD */ 2137 PetscInt max_n,min_n; 2138 max_n = max_size_of_constraint; 2139 min_n = max_constraints; 2140 if (max_size_of_constraint < max_constraints) { 2141 min_n = max_size_of_constraint; 2142 max_n = max_constraints; 2143 } 2144 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2145 #if defined(PETSC_USE_COMPLEX) 2146 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2147 #endif 2148 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2149 lwork = -1; 2150 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2151 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2152 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2153 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2154 #if !defined(PETSC_USE_COMPLEX) 2155 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2156 #else 2157 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2158 #endif 2159 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2160 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2161 #endif /* on missing GESVD */ 2162 /* Allocate optimal workspace */ 2163 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2164 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2165 } 2166 /* Now we can loop on constraining sets */ 2167 total_counts = 0; 2168 constraints_idxs_ptr[0] = 0; 2169 constraints_data_ptr[0] = 0; 2170 /* vertices */ 2171 if (n_vertices) { 2172 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2173 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2174 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2175 for (i=0;i<n_vertices;i++) { 2176 constraints_n[total_counts] = 1; 2177 constraints_data[total_counts] = 1.0; 2178 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2179 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2180 total_counts++; 2181 } 2182 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2183 PetscBool used_vertex; 2184 for (i=0;i<n_vertices;i++) { 2185 used_vertex = PETSC_FALSE; 2186 k = 0; 2187 while (!used_vertex && k<nnsp_size) { 2188 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2189 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2190 constraints_n[total_counts] = 1; 2191 constraints_idxs[total_counts] = is_indices[i]; 2192 constraints_data[total_counts] = 1.0; 2193 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2194 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2195 total_counts++; 2196 used_vertex = PETSC_TRUE; 2197 } 2198 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2199 k++; 2200 } 2201 } 2202 } 2203 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2204 n_vertices = total_counts; 2205 } 2206 2207 /* edges and faces */ 2208 total_counts_cc = total_counts; 2209 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2210 IS used_is; 2211 PetscBool idxs_copied = PETSC_FALSE; 2212 2213 if (ncc<n_ISForEdges) { 2214 used_is = ISForEdges[ncc]; 2215 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2216 } else { 2217 used_is = ISForFaces[ncc-n_ISForEdges]; 2218 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2219 } 2220 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2221 2222 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2223 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2224 /* change of basis should not be performed on local periodic nodes */ 2225 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2226 if (nnsp_has_cnst) { 2227 PetscScalar quad_value; 2228 2229 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2230 idxs_copied = PETSC_TRUE; 2231 2232 if (!pcbddc->use_nnsp_true) { 2233 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2234 } else { 2235 quad_value = 1.0; 2236 } 2237 for (j=0;j<size_of_constraint;j++) { 2238 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2239 } 2240 temp_constraints++; 2241 total_counts++; 2242 } 2243 for (k=0;k<nnsp_size;k++) { 2244 PetscReal real_value; 2245 PetscScalar *ptr_to_data; 2246 2247 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2248 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2249 for (j=0;j<size_of_constraint;j++) { 2250 ptr_to_data[j] = array[is_indices[j]]; 2251 } 2252 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2253 /* check if array is null on the connected component */ 2254 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2255 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2256 if (real_value > 0.0) { /* keep indices and values */ 2257 temp_constraints++; 2258 total_counts++; 2259 if (!idxs_copied) { 2260 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2261 idxs_copied = PETSC_TRUE; 2262 } 2263 } 2264 } 2265 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2266 valid_constraints = temp_constraints; 2267 if (!pcbddc->use_nnsp_true && temp_constraints) { 2268 if (temp_constraints == 1) { /* just normalize the constraint */ 2269 PetscScalar norm,*ptr_to_data; 2270 2271 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2272 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2273 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2274 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2275 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2276 } else { /* perform SVD */ 2277 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2278 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2279 2280 #if defined(PETSC_MISSING_LAPACK_GESVD) 2281 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2282 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2283 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2284 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2285 from that computed using LAPACKgesvd 2286 -> This is due to a different computation of eigenvectors in LAPACKheev 2287 -> The quality of the POD-computed basis will be the same */ 2288 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2289 /* Store upper triangular part of correlation matrix */ 2290 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2291 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2292 for (j=0;j<temp_constraints;j++) { 2293 for (k=0;k<j+1;k++) { 2294 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2295 } 2296 } 2297 /* compute eigenvalues and eigenvectors of correlation matrix */ 2298 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2299 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2300 #if !defined(PETSC_USE_COMPLEX) 2301 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2302 #else 2303 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2304 #endif 2305 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2306 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2307 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2308 j = 0; 2309 while (j < temp_constraints && singular_vals[j] < tol) j++; 2310 total_counts = total_counts-j; 2311 valid_constraints = temp_constraints-j; 2312 /* scale and copy POD basis into used quadrature memory */ 2313 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2314 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2315 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2316 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2317 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2318 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2319 if (j<temp_constraints) { 2320 PetscInt ii; 2321 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2322 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2323 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2324 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2325 for (k=0;k<temp_constraints-j;k++) { 2326 for (ii=0;ii<size_of_constraint;ii++) { 2327 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2328 } 2329 } 2330 } 2331 #else /* on missing GESVD */ 2332 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2333 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2334 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2335 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2336 #if !defined(PETSC_USE_COMPLEX) 2337 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2338 #else 2339 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2340 #endif 2341 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2342 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2343 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2344 k = temp_constraints; 2345 if (k > size_of_constraint) k = size_of_constraint; 2346 j = 0; 2347 while (j < k && singular_vals[k-j-1] < tol) j++; 2348 valid_constraints = k-j; 2349 total_counts = total_counts-temp_constraints+valid_constraints; 2350 #endif /* on missing GESVD */ 2351 } 2352 } 2353 /* update pointers information */ 2354 if (valid_constraints) { 2355 constraints_n[total_counts_cc] = valid_constraints; 2356 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2357 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2358 /* set change_of_basis flag */ 2359 if (boolforchange) { 2360 PetscBTSet(change_basis,total_counts_cc); 2361 } 2362 total_counts_cc++; 2363 } 2364 } 2365 /* free workspace */ 2366 if (!skip_lapack) { 2367 ierr = PetscFree(work);CHKERRQ(ierr); 2368 #if defined(PETSC_USE_COMPLEX) 2369 ierr = PetscFree(rwork);CHKERRQ(ierr); 2370 #endif 2371 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2372 #if defined(PETSC_MISSING_LAPACK_GESVD) 2373 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2374 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2375 #endif 2376 } 2377 for (k=0;k<nnsp_size;k++) { 2378 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2379 } 2380 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2381 /* free index sets of faces, edges and vertices */ 2382 for (i=0;i<n_ISForFaces;i++) { 2383 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2384 } 2385 if (n_ISForFaces) { 2386 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2387 } 2388 for (i=0;i<n_ISForEdges;i++) { 2389 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2390 } 2391 if (n_ISForEdges) { 2392 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2393 } 2394 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2395 } else { 2396 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2397 2398 total_counts = 0; 2399 n_vertices = 0; 2400 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2401 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2402 } 2403 max_constraints = 0; 2404 total_counts_cc = 0; 2405 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2406 total_counts += pcbddc->adaptive_constraints_n[i]; 2407 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2408 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2409 } 2410 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2411 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2412 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2413 constraints_data = pcbddc->adaptive_constraints_data; 2414 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2415 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2416 total_counts_cc = 0; 2417 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2418 if (pcbddc->adaptive_constraints_n[i]) { 2419 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2420 } 2421 } 2422 #if 0 2423 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2424 for (i=0;i<total_counts_cc;i++) { 2425 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2426 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2427 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2428 printf(" %d",constraints_idxs[j]); 2429 } 2430 printf("\n"); 2431 printf("number of cc: %d\n",constraints_n[i]); 2432 } 2433 for (i=0;i<n_vertices;i++) { 2434 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2435 } 2436 for (i=0;i<sub_schurs->n_subs;i++) { 2437 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2438 } 2439 #endif 2440 2441 max_size_of_constraint = 0; 2442 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2443 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2444 /* Change of basis */ 2445 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2446 if (pcbddc->use_change_of_basis) { 2447 for (i=0;i<sub_schurs->n_subs;i++) { 2448 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2449 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2450 } 2451 } 2452 } 2453 } 2454 pcbddc->local_primal_size = total_counts; 2455 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2456 2457 /* map constraints_idxs in boundary numbering */ 2458 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2459 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 2460 2461 /* Create constraint matrix */ 2462 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2463 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2464 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2465 2466 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2467 /* determine if a QR strategy is needed for change of basis */ 2468 qr_needed = PETSC_FALSE; 2469 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2470 total_primal_vertices=0; 2471 pcbddc->local_primal_size_cc = 0; 2472 for (i=0;i<total_counts_cc;i++) { 2473 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2474 if (size_of_constraint == 1) { 2475 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2476 pcbddc->local_primal_size_cc += 1; 2477 } else if (PetscBTLookup(change_basis,i)) { 2478 for (k=0;k<constraints_n[i];k++) { 2479 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2480 } 2481 pcbddc->local_primal_size_cc += constraints_n[i]; 2482 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2483 PetscBTSet(qr_needed_idx,i); 2484 qr_needed = PETSC_TRUE; 2485 } 2486 } else { 2487 pcbddc->local_primal_size_cc += 1; 2488 } 2489 } 2490 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2491 pcbddc->n_vertices = total_primal_vertices; 2492 /* permute indices in order to have a sorted set of vertices */ 2493 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2494 2495 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2496 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2497 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2498 2499 /* nonzero structure of constraint matrix */ 2500 /* and get reference dof for local constraints */ 2501 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2502 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2503 2504 j = total_primal_vertices; 2505 total_counts = total_primal_vertices; 2506 cum = total_primal_vertices; 2507 for (i=n_vertices;i<total_counts_cc;i++) { 2508 if (!PetscBTLookup(change_basis,i)) { 2509 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2510 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2511 cum++; 2512 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2513 for (k=0;k<constraints_n[i];k++) { 2514 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2515 nnz[j+k] = size_of_constraint; 2516 } 2517 j += constraints_n[i]; 2518 } 2519 } 2520 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2521 ierr = PetscFree(nnz);CHKERRQ(ierr); 2522 2523 /* set values in constraint matrix */ 2524 for (i=0;i<total_primal_vertices;i++) { 2525 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2526 } 2527 total_counts = total_primal_vertices; 2528 for (i=n_vertices;i<total_counts_cc;i++) { 2529 if (!PetscBTLookup(change_basis,i)) { 2530 PetscInt *cols; 2531 2532 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2533 cols = constraints_idxs+constraints_idxs_ptr[i]; 2534 for (k=0;k<constraints_n[i];k++) { 2535 PetscInt row = total_counts+k; 2536 PetscScalar *vals; 2537 2538 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2539 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2540 } 2541 total_counts += constraints_n[i]; 2542 } 2543 } 2544 /* assembling */ 2545 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2546 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2547 2548 /* 2549 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2550 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2551 */ 2552 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2553 if (pcbddc->use_change_of_basis) { 2554 /* dual and primal dofs on a single cc */ 2555 PetscInt dual_dofs,primal_dofs; 2556 /* working stuff for GEQRF */ 2557 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2558 PetscBLASInt lqr_work; 2559 /* working stuff for UNGQR */ 2560 PetscScalar *gqr_work,lgqr_work_t; 2561 PetscBLASInt lgqr_work; 2562 /* working stuff for TRTRS */ 2563 PetscScalar *trs_rhs; 2564 PetscBLASInt Blas_NRHS; 2565 /* pointers for values insertion into change of basis matrix */ 2566 PetscInt *start_rows,*start_cols; 2567 PetscScalar *start_vals; 2568 /* working stuff for values insertion */ 2569 PetscBT is_primal; 2570 PetscInt *aux_primal_numbering_B; 2571 /* matrix sizes */ 2572 PetscInt global_size,local_size; 2573 /* temporary change of basis */ 2574 Mat localChangeOfBasisMatrix; 2575 /* extra space for debugging */ 2576 PetscScalar *dbg_work; 2577 2578 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2579 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2580 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2581 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2582 /* nonzeros for local mat */ 2583 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2584 for (i=0;i<pcis->n;i++) nnz[i]=1; 2585 for (i=n_vertices;i<total_counts_cc;i++) { 2586 if (PetscBTLookup(change_basis,i)) { 2587 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2588 if (PetscBTLookup(qr_needed_idx,i)) { 2589 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2590 } else { 2591 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2592 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2593 } 2594 } 2595 } 2596 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2597 ierr = PetscFree(nnz);CHKERRQ(ierr); 2598 /* Set initial identity in the matrix */ 2599 for (i=0;i<pcis->n;i++) { 2600 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2601 } 2602 2603 if (pcbddc->dbg_flag) { 2604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2606 } 2607 2608 2609 /* Now we loop on the constraints which need a change of basis */ 2610 /* 2611 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2612 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2613 2614 Basic blocks of change of basis matrix T computed by 2615 2616 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2617 2618 | 1 0 ... 0 s_1/S | 2619 | 0 1 ... 0 s_2/S | 2620 | ... | 2621 | 0 ... 1 s_{n-1}/S | 2622 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2623 2624 with S = \sum_{i=1}^n s_i^2 2625 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2626 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2627 2628 - QR decomposition of constraints otherwise 2629 */ 2630 if (qr_needed) { 2631 /* space to store Q */ 2632 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2633 /* first we issue queries for optimal work */ 2634 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2635 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2636 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2637 lqr_work = -1; 2638 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2639 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2640 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2641 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2642 lgqr_work = -1; 2643 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2644 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2645 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2646 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2647 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2648 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2649 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2650 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2651 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2652 /* array to store scaling factors for reflectors */ 2653 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2654 /* array to store rhs and solution of triangular solver */ 2655 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2656 /* allocating workspace for check */ 2657 if (pcbddc->dbg_flag) { 2658 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2659 } 2660 } 2661 /* array to store whether a node is primal or not */ 2662 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2663 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2664 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2665 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 2666 for (i=0;i<total_primal_vertices;i++) { 2667 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2668 } 2669 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2670 2671 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2672 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2673 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2674 if (PetscBTLookup(change_basis,total_counts)) { 2675 /* get constraint info */ 2676 primal_dofs = constraints_n[total_counts]; 2677 dual_dofs = size_of_constraint-primal_dofs; 2678 2679 if (pcbddc->dbg_flag) { 2680 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2681 } 2682 2683 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2684 2685 /* copy quadrature constraints for change of basis check */ 2686 if (pcbddc->dbg_flag) { 2687 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2688 } 2689 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2690 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2691 2692 /* compute QR decomposition of constraints */ 2693 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2694 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2695 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2696 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2697 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2698 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2699 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2700 2701 /* explictly compute R^-T */ 2702 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2703 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2704 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2705 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2706 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2707 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2708 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2709 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2710 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2711 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2712 2713 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2715 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2716 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2717 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2718 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2719 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2720 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2721 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2722 2723 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2724 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2725 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2726 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2727 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2728 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2729 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2730 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2732 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2733 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2734 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2735 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2736 2737 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2738 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2739 /* insert cols for primal dofs */ 2740 for (j=0;j<primal_dofs;j++) { 2741 start_vals = &qr_basis[j*size_of_constraint]; 2742 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2743 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2744 } 2745 /* insert cols for dual dofs */ 2746 for (j=0,k=0;j<dual_dofs;k++) { 2747 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2748 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2749 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2750 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2751 j++; 2752 } 2753 } 2754 2755 /* check change of basis */ 2756 if (pcbddc->dbg_flag) { 2757 PetscInt ii,jj; 2758 PetscBool valid_qr=PETSC_TRUE; 2759 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2760 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2761 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2762 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2763 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2764 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2765 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2766 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2767 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2768 for (jj=0;jj<size_of_constraint;jj++) { 2769 for (ii=0;ii<primal_dofs;ii++) { 2770 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2771 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2772 } 2773 } 2774 if (!valid_qr) { 2775 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2776 for (jj=0;jj<size_of_constraint;jj++) { 2777 for (ii=0;ii<primal_dofs;ii++) { 2778 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2779 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2780 } 2781 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2782 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2783 } 2784 } 2785 } 2786 } else { 2787 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2788 } 2789 } 2790 } else { /* simple transformation block */ 2791 PetscInt row,col; 2792 PetscScalar val,norm; 2793 2794 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2795 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2796 for (j=0;j<size_of_constraint;j++) { 2797 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2798 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2799 if (!PetscBTLookup(is_primal,row_B)) { 2800 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2801 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2802 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2803 } else { 2804 for (k=0;k<size_of_constraint;k++) { 2805 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2806 if (row != col) { 2807 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2808 } else { 2809 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2810 } 2811 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2812 } 2813 } 2814 } 2815 if (pcbddc->dbg_flag) { 2816 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2817 } 2818 } 2819 } else { 2820 if (pcbddc->dbg_flag) { 2821 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2822 } 2823 } 2824 } 2825 2826 /* free workspace */ 2827 if (qr_needed) { 2828 if (pcbddc->dbg_flag) { 2829 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2830 } 2831 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2832 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2833 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2834 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2835 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2836 } 2837 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2838 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2839 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 2841 /* assembling of global change of variable */ 2842 { 2843 Mat tmat; 2844 PetscInt bs; 2845 2846 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2847 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2848 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2849 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2850 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2851 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2852 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2853 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2854 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2855 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2856 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2857 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2858 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2859 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2860 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2861 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2862 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2863 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2864 } 2865 /* check */ 2866 if (pcbddc->dbg_flag) { 2867 PetscReal error; 2868 Vec x,x_change; 2869 2870 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2871 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2872 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2873 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2874 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2875 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2876 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2877 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2878 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2879 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2880 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2881 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2882 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2883 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2884 ierr = VecDestroy(&x);CHKERRQ(ierr); 2885 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2886 } 2887 2888 /* adapt sub_schurs computed (if any) */ 2889 if (pcbddc->use_deluxe_scaling) { 2890 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2891 if (sub_schurs->S_Ej_all) { 2892 Mat S_new,tmat; 2893 IS is_all_N; 2894 2895 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2896 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2897 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2898 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2899 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2900 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2901 sub_schurs->S_Ej_all = S_new; 2902 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2903 if (sub_schurs->sum_S_Ej_all) { 2904 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2905 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2906 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2907 sub_schurs->sum_S_Ej_all = S_new; 2908 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2909 } 2910 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2911 } 2912 } 2913 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2914 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2915 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2916 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2917 } 2918 2919 /* set up change of basis context */ 2920 if (pcbddc->ChangeOfBasisMatrix) { 2921 PCBDDCChange_ctx change_ctx; 2922 2923 if (!pcbddc->new_global_mat) { 2924 PetscInt global_size,local_size; 2925 2926 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2927 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2928 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2929 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2930 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2931 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2932 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2933 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2934 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2935 } else { 2936 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2937 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2938 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2939 } 2940 if (!pcbddc->user_ChangeOfBasisMatrix) { 2941 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2942 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2943 } else { 2944 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2945 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2946 } 2947 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2948 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2949 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2950 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2951 } 2952 2953 /* check if a new primal space has been introduced */ 2954 pcbddc->new_primal_space_local = PETSC_TRUE; 2955 if (olocal_primal_size == pcbddc->local_primal_size) { 2956 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2957 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2958 if (!pcbddc->new_primal_space_local) { 2959 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2960 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2961 } 2962 } 2963 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2964 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2965 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2966 2967 /* flush dbg viewer */ 2968 if (pcbddc->dbg_flag) { 2969 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2970 } 2971 2972 /* free workspace */ 2973 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2974 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2975 if (!pcbddc->adaptive_selection) { 2976 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2977 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2978 } else { 2979 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2980 pcbddc->adaptive_constraints_idxs_ptr, 2981 pcbddc->adaptive_constraints_data_ptr, 2982 pcbddc->adaptive_constraints_idxs, 2983 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2984 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2985 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2986 } 2987 PetscFunctionReturn(0); 2988 } 2989 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2992 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2993 { 2994 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2995 PC_IS *pcis = (PC_IS*)pc->data; 2996 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2997 PetscInt ierr,i,vertex_size,N; 2998 PetscViewer viewer=pcbddc->dbg_viewer; 2999 3000 PetscFunctionBegin; 3001 /* Reset previously computed graph */ 3002 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3003 /* Init local Graph struct */ 3004 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3005 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3006 3007 /* Check validity of the csr graph passed in by the user */ 3008 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3009 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3010 } 3011 3012 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3013 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3014 PetscInt *xadj,*adjncy; 3015 PetscInt nvtxs; 3016 PetscBool flg_row=PETSC_FALSE; 3017 3018 if (pcbddc->use_local_adj) { 3019 3020 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3021 if (flg_row) { 3022 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3023 pcbddc->computed_rowadj = PETSC_TRUE; 3024 } 3025 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3026 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3027 IS is_dummy; 3028 ISLocalToGlobalMapping l2gmap_dummy; 3029 PetscInt j,sum; 3030 PetscInt *cxadj,*cadjncy; 3031 const PetscInt *idxs; 3032 PCBDDCGraph graph; 3033 PetscBT is_on_boundary; 3034 3035 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3036 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3037 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3038 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3039 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3040 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3041 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3042 if (flg_row) { 3043 graph->xadj = xadj; 3044 graph->adjncy = adjncy; 3045 } 3046 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3047 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3048 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3049 3050 if (pcbddc->dbg_flag) { 3051 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3052 for (i=0;i<graph->ncc;i++) { 3053 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3054 } 3055 } 3056 3057 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3058 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3059 for (i=0;i<pcis->n_B;i++) { 3060 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3061 } 3062 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3063 3064 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3065 sum = 0; 3066 for (i=0;i<graph->ncc;i++) { 3067 PetscInt sizecc = 0; 3068 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3069 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3070 sizecc++; 3071 } 3072 } 3073 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3074 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3075 cxadj[graph->queue[j]] = sizecc; 3076 } 3077 } 3078 sum += sizecc*sizecc; 3079 } 3080 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3081 sum = 0; 3082 for (i=0;i<pcis->n;i++) { 3083 PetscInt temp = cxadj[i]; 3084 cxadj[i] = sum; 3085 sum += temp; 3086 } 3087 cxadj[pcis->n] = sum; 3088 for (i=0;i<graph->ncc;i++) { 3089 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3090 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3091 PetscInt k,sizecc = 0; 3092 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3093 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3094 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3095 sizecc++; 3096 } 3097 } 3098 } 3099 } 3100 } 3101 if (sum) { 3102 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3103 } else { 3104 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3105 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3106 } 3107 graph->xadj = 0; 3108 graph->adjncy = 0; 3109 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3110 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3111 } 3112 } 3113 if (pcbddc->dbg_flag) { 3114 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3115 } 3116 3117 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3118 vertex_size = 1; 3119 if (pcbddc->user_provided_isfordofs) { 3120 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3121 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3122 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3123 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3124 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3125 } 3126 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3127 pcbddc->n_ISForDofs = 0; 3128 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3129 } 3130 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3131 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3132 } else { 3133 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3134 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3135 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3136 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3137 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3138 } 3139 } 3140 } 3141 3142 /* Setup of Graph */ 3143 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3144 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3145 } 3146 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3147 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3148 } 3149 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3150 3151 /* Graph's connected components analysis */ 3152 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3153 3154 /* print some info to stdout */ 3155 if (pcbddc->dbg_flag) { 3156 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3157 } 3158 3159 /* mark topography has done */ 3160 pcbddc->recompute_topography = PETSC_FALSE; 3161 PetscFunctionReturn(0); 3162 } 3163 3164 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3165 #undef __FUNCT__ 3166 #define __FUNCT__ "PCBDDCSubsetNumbering" 3167 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3168 { 3169 PetscSF sf; 3170 PetscLayout map; 3171 const PetscInt *idxs; 3172 PetscInt *leaf_data,*root_data,*gidxs; 3173 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3174 PetscInt n_n,nlocals,start,first_index; 3175 PetscMPIInt commsize; 3176 PetscBool first_found; 3177 PetscErrorCode ierr; 3178 3179 PetscFunctionBegin; 3180 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3181 if (subset_mult) { 3182 PetscCheckSameComm(subset,1,subset_mult,2); 3183 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3184 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3185 } 3186 /* create workspace layout for computing global indices of subset */ 3187 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3188 lbounds[0] = lbounds[1] = 0; 3189 for (i=0;i<n;i++) { 3190 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3191 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3192 } 3193 lbounds[0] = -lbounds[0]; 3194 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3195 gbounds[0] = -gbounds[0]; 3196 N = gbounds[1] - gbounds[0] + 1; 3197 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3198 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3199 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3200 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3201 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3202 3203 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3204 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3205 if (subset_mult) { 3206 const PetscInt* idxs_mult; 3207 3208 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3209 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3210 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3211 } else { 3212 for (i=0;i<n;i++) leaf_data[i] = 1; 3213 } 3214 /* local size of new subset */ 3215 n_n = 0; 3216 for (i=0;i<n;i++) n_n += leaf_data[i]; 3217 3218 /* global indexes in layout */ 3219 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3220 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3221 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3222 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3223 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3224 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3225 3226 /* reduce from leaves to roots */ 3227 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3228 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3229 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3230 3231 /* count indexes in local part of layout */ 3232 nlocals = 0; 3233 first_index = -1; 3234 first_found = PETSC_FALSE; 3235 for (i=0;i<Nl;i++) { 3236 if (!first_found && root_data[i]) { 3237 first_found = PETSC_TRUE; 3238 first_index = i; 3239 } 3240 nlocals += root_data[i]; 3241 } 3242 3243 /* cumulative of number of indexes and size of subset without holes */ 3244 #if defined(PETSC_HAVE_MPI_EXSCAN) 3245 start = 0; 3246 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3247 #else 3248 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3249 start = start-nlocals; 3250 #endif 3251 3252 if (N_n) { /* compute total size of new subset if requested */ 3253 *N_n = start + nlocals; 3254 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3255 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3256 } 3257 3258 /* adapt root data with cumulative */ 3259 if (first_found) { 3260 PetscInt old_index; 3261 3262 root_data[first_index] += start; 3263 old_index = first_index; 3264 for (i=first_index+1;i<Nl;i++) { 3265 if (root_data[i]) { 3266 root_data[i] += root_data[old_index]; 3267 old_index = i; 3268 } 3269 } 3270 } 3271 3272 /* from roots to leaves */ 3273 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3274 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3275 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3276 3277 /* create new IS with global indexes without holes */ 3278 if (subset_mult) { 3279 const PetscInt* idxs_mult; 3280 PetscInt cum; 3281 3282 cum = 0; 3283 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3284 for (i=0;i<n;i++) { 3285 PetscInt j; 3286 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3287 } 3288 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3289 } else { 3290 for (i=0;i<n;i++) { 3291 gidxs[i] = leaf_data[i]-1; 3292 } 3293 } 3294 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3295 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3296 PetscFunctionReturn(0); 3297 } 3298 3299 #undef __FUNCT__ 3300 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3301 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3302 { 3303 PetscInt i,j; 3304 PetscScalar *alphas; 3305 PetscErrorCode ierr; 3306 3307 PetscFunctionBegin; 3308 /* this implements stabilized Gram-Schmidt */ 3309 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3310 for (i=0;i<n;i++) { 3311 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3312 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3313 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3314 } 3315 ierr = PetscFree(alphas);CHKERRQ(ierr); 3316 PetscFunctionReturn(0); 3317 } 3318 3319 #undef __FUNCT__ 3320 #define __FUNCT__ "MatISGetSubassemblingPattern" 3321 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3322 { 3323 IS ranks_send_to; 3324 PetscInt n_neighs,*neighs,*n_shared,**shared; 3325 PetscMPIInt size,rank,color; 3326 PetscInt *xadj,*adjncy; 3327 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3328 PetscInt i,local_size,threshold=0; 3329 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3330 PetscSubcomm subcomm; 3331 PetscErrorCode ierr; 3332 3333 PetscFunctionBegin; 3334 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3335 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3336 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3337 3338 /* Get info on mapping */ 3339 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3340 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3341 3342 /* build local CSR graph of subdomains' connectivity */ 3343 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3344 xadj[0] = 0; 3345 xadj[1] = PetscMax(n_neighs-1,0); 3346 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3347 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3348 3349 if (threshold) { 3350 PetscInt xadj_count = 0; 3351 for (i=1;i<n_neighs;i++) { 3352 if (n_shared[i] > threshold) { 3353 adjncy[xadj_count] = neighs[i]; 3354 adjncy_wgt[xadj_count] = n_shared[i]; 3355 xadj_count++; 3356 } 3357 } 3358 xadj[1] = xadj_count; 3359 } else { 3360 if (xadj[1]) { 3361 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3362 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3363 } 3364 } 3365 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3366 if (use_square) { 3367 for (i=0;i<xadj[1];i++) { 3368 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3369 } 3370 } 3371 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3372 3373 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3374 3375 /* 3376 Restrict work on active processes only. 3377 */ 3378 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3379 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3380 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3381 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3382 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3383 if (color) { 3384 ierr = PetscFree(xadj);CHKERRQ(ierr); 3385 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3386 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3387 } else { 3388 Mat subdomain_adj; 3389 IS new_ranks,new_ranks_contig; 3390 MatPartitioning partitioner; 3391 PetscInt prank,rstart=0,rend=0; 3392 PetscInt *is_indices,*oldranks; 3393 PetscBool aggregate; 3394 3395 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3396 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3397 prank = rank; 3398 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3399 /* 3400 for (i=0;i<size;i++) { 3401 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3402 } 3403 */ 3404 for (i=0;i<xadj[1];i++) { 3405 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3406 } 3407 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3408 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3409 if (aggregate) { 3410 PetscInt lrows,row,ncols,*cols; 3411 PetscMPIInt nrank; 3412 PetscScalar *vals; 3413 3414 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3415 lrows = 0; 3416 if (nrank<redprocs) { 3417 lrows = size/redprocs; 3418 if (nrank<size%redprocs) lrows++; 3419 } 3420 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3421 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3422 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3423 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3424 row = nrank; 3425 ncols = xadj[1]-xadj[0]; 3426 cols = adjncy; 3427 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3428 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3429 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3430 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3431 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3432 ierr = PetscFree(xadj);CHKERRQ(ierr); 3433 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3434 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3435 ierr = PetscFree(vals);CHKERRQ(ierr); 3436 } else { 3437 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3438 } 3439 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3440 3441 /* Partition */ 3442 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3443 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3444 if (use_vwgt) { 3445 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3446 v_wgt[0] = local_size; 3447 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3448 } 3449 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3450 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3451 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3452 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3453 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3454 3455 /* renumber new_ranks to avoid "holes" in new set of processors */ 3456 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3457 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3458 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3459 if (!redprocs) { 3460 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3461 } else { 3462 PetscInt idxs[1]; 3463 PetscMPIInt tag; 3464 MPI_Request *reqs; 3465 3466 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3467 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3468 for (i=rstart;i<rend;i++) { 3469 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3470 } 3471 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3472 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3473 ierr = PetscFree(reqs);CHKERRQ(ierr); 3474 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3475 } 3476 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3477 /* clean up */ 3478 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3479 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3480 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3481 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3482 } 3483 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3484 3485 /* assemble parallel IS for sends */ 3486 i = 1; 3487 if (color) i=0; 3488 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3489 /* get back IS */ 3490 *is_sends = ranks_send_to; 3491 PetscFunctionReturn(0); 3492 } 3493 3494 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3495 3496 #undef __FUNCT__ 3497 #define __FUNCT__ "MatISSubassemble" 3498 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3499 { 3500 Mat local_mat; 3501 IS is_sends_internal; 3502 PetscInt rows,cols,new_local_rows; 3503 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3504 PetscBool ismatis,isdense,newisdense,destroy_mat; 3505 ISLocalToGlobalMapping l2gmap; 3506 PetscInt* l2gmap_indices; 3507 const PetscInt* is_indices; 3508 MatType new_local_type; 3509 /* buffers */ 3510 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3511 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3512 PetscInt *recv_buffer_idxs_local; 3513 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3514 /* MPI */ 3515 MPI_Comm comm,comm_n; 3516 PetscSubcomm subcomm; 3517 PetscMPIInt n_sends,n_recvs,commsize; 3518 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3519 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3520 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3521 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3522 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3523 PetscErrorCode ierr; 3524 3525 PetscFunctionBegin; 3526 /* TODO: add missing checks */ 3527 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3528 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3529 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3530 PetscValidLogicalCollectiveInt(mat,nis,7); 3531 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3532 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3533 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3534 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3535 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3536 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3537 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3538 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3539 PetscInt mrows,mcols,mnrows,mncols; 3540 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3541 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3542 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3543 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3544 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3545 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3546 } 3547 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3548 PetscValidLogicalCollectiveInt(mat,bs,0); 3549 /* prepare IS for sending if not provided */ 3550 if (!is_sends) { 3551 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3552 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3553 } else { 3554 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3555 is_sends_internal = is_sends; 3556 } 3557 3558 /* get comm */ 3559 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3560 3561 /* compute number of sends */ 3562 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3563 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3564 3565 /* compute number of receives */ 3566 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3567 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3568 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3569 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3570 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3571 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3572 ierr = PetscFree(iflags);CHKERRQ(ierr); 3573 3574 /* restrict comm if requested */ 3575 subcomm = 0; 3576 destroy_mat = PETSC_FALSE; 3577 if (restrict_comm) { 3578 PetscMPIInt color,subcommsize; 3579 3580 color = 0; 3581 if (restrict_full) { 3582 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3583 } else { 3584 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3585 } 3586 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3587 subcommsize = commsize - subcommsize; 3588 /* check if reuse has been requested */ 3589 if (reuse == MAT_REUSE_MATRIX) { 3590 if (*mat_n) { 3591 PetscMPIInt subcommsize2; 3592 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3593 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3594 comm_n = PetscObjectComm((PetscObject)*mat_n); 3595 } else { 3596 comm_n = PETSC_COMM_SELF; 3597 } 3598 } else { /* MAT_INITIAL_MATRIX */ 3599 PetscMPIInt rank; 3600 3601 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3602 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3603 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3604 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3605 comm_n = PetscSubcommChild(subcomm); 3606 } 3607 /* flag to destroy *mat_n if not significative */ 3608 if (color) destroy_mat = PETSC_TRUE; 3609 } else { 3610 comm_n = comm; 3611 } 3612 3613 /* prepare send/receive buffers */ 3614 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3615 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3616 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3617 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3618 if (nis) { 3619 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3620 } 3621 3622 /* Get data from local matrices */ 3623 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3624 /* TODO: See below some guidelines on how to prepare the local buffers */ 3625 /* 3626 send_buffer_vals should contain the raw values of the local matrix 3627 send_buffer_idxs should contain: 3628 - MatType_PRIVATE type 3629 - PetscInt size_of_l2gmap 3630 - PetscInt global_row_indices[size_of_l2gmap] 3631 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3632 */ 3633 else { 3634 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3635 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3636 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3637 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3638 send_buffer_idxs[1] = i; 3639 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3640 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3641 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3642 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3643 for (i=0;i<n_sends;i++) { 3644 ilengths_vals[is_indices[i]] = len*len; 3645 ilengths_idxs[is_indices[i]] = len+2; 3646 } 3647 } 3648 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3649 /* additional is (if any) */ 3650 if (nis) { 3651 PetscMPIInt psum; 3652 PetscInt j; 3653 for (j=0,psum=0;j<nis;j++) { 3654 PetscInt plen; 3655 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3656 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3657 psum += len+1; /* indices + lenght */ 3658 } 3659 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3660 for (j=0,psum=0;j<nis;j++) { 3661 PetscInt plen; 3662 const PetscInt *is_array_idxs; 3663 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3664 send_buffer_idxs_is[psum] = plen; 3665 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3666 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3667 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3668 psum += plen+1; /* indices + lenght */ 3669 } 3670 for (i=0;i<n_sends;i++) { 3671 ilengths_idxs_is[is_indices[i]] = psum; 3672 } 3673 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3674 } 3675 3676 buf_size_idxs = 0; 3677 buf_size_vals = 0; 3678 buf_size_idxs_is = 0; 3679 for (i=0;i<n_recvs;i++) { 3680 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3681 buf_size_vals += (PetscInt)olengths_vals[i]; 3682 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3683 } 3684 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3685 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3686 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3687 3688 /* get new tags for clean communications */ 3689 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3690 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3691 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3692 3693 /* allocate for requests */ 3694 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3695 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3696 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3697 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3698 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3699 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3700 3701 /* communications */ 3702 ptr_idxs = recv_buffer_idxs; 3703 ptr_vals = recv_buffer_vals; 3704 ptr_idxs_is = recv_buffer_idxs_is; 3705 for (i=0;i<n_recvs;i++) { 3706 source_dest = onodes[i]; 3707 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3708 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3709 ptr_idxs += olengths_idxs[i]; 3710 ptr_vals += olengths_vals[i]; 3711 if (nis) { 3712 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3713 ptr_idxs_is += olengths_idxs_is[i]; 3714 } 3715 } 3716 for (i=0;i<n_sends;i++) { 3717 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3718 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3719 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3720 if (nis) { 3721 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3722 } 3723 } 3724 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3725 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3726 3727 /* assemble new l2g map */ 3728 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3729 ptr_idxs = recv_buffer_idxs; 3730 new_local_rows = 0; 3731 for (i=0;i<n_recvs;i++) { 3732 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3733 ptr_idxs += olengths_idxs[i]; 3734 } 3735 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3736 ptr_idxs = recv_buffer_idxs; 3737 new_local_rows = 0; 3738 for (i=0;i<n_recvs;i++) { 3739 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3740 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3741 ptr_idxs += olengths_idxs[i]; 3742 } 3743 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3744 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3745 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3746 3747 /* infer new local matrix type from received local matrices type */ 3748 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3749 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3750 if (n_recvs) { 3751 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3752 ptr_idxs = recv_buffer_idxs; 3753 for (i=0;i<n_recvs;i++) { 3754 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3755 new_local_type_private = MATAIJ_PRIVATE; 3756 break; 3757 } 3758 ptr_idxs += olengths_idxs[i]; 3759 } 3760 switch (new_local_type_private) { 3761 case MATDENSE_PRIVATE: 3762 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3763 new_local_type = MATSEQAIJ; 3764 bs = 1; 3765 } else { /* if I receive only 1 dense matrix */ 3766 new_local_type = MATSEQDENSE; 3767 bs = 1; 3768 } 3769 break; 3770 case MATAIJ_PRIVATE: 3771 new_local_type = MATSEQAIJ; 3772 bs = 1; 3773 break; 3774 case MATBAIJ_PRIVATE: 3775 new_local_type = MATSEQBAIJ; 3776 break; 3777 case MATSBAIJ_PRIVATE: 3778 new_local_type = MATSEQSBAIJ; 3779 break; 3780 default: 3781 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3782 break; 3783 } 3784 } else { /* by default, new_local_type is seqdense */ 3785 new_local_type = MATSEQDENSE; 3786 bs = 1; 3787 } 3788 3789 /* create MATIS object if needed */ 3790 if (reuse == MAT_INITIAL_MATRIX) { 3791 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3792 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3793 } else { 3794 /* it also destroys the local matrices */ 3795 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3796 } 3797 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3798 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3799 3800 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3801 3802 /* Global to local map of received indices */ 3803 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3804 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3805 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3806 3807 /* restore attributes -> type of incoming data and its size */ 3808 buf_size_idxs = 0; 3809 for (i=0;i<n_recvs;i++) { 3810 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3811 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3812 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3813 } 3814 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3815 3816 /* set preallocation */ 3817 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3818 if (!newisdense) { 3819 PetscInt *new_local_nnz=0; 3820 3821 ptr_vals = recv_buffer_vals; 3822 ptr_idxs = recv_buffer_idxs_local; 3823 if (n_recvs) { 3824 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3825 } 3826 for (i=0;i<n_recvs;i++) { 3827 PetscInt j; 3828 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3829 for (j=0;j<*(ptr_idxs+1);j++) { 3830 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3831 } 3832 } else { 3833 /* TODO */ 3834 } 3835 ptr_idxs += olengths_idxs[i]; 3836 } 3837 if (new_local_nnz) { 3838 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3839 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3840 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3841 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3842 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3843 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3844 } else { 3845 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3846 } 3847 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3848 } else { 3849 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3850 } 3851 3852 /* set values */ 3853 ptr_vals = recv_buffer_vals; 3854 ptr_idxs = recv_buffer_idxs_local; 3855 for (i=0;i<n_recvs;i++) { 3856 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3857 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3858 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3859 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3860 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3861 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3862 } else { 3863 /* TODO */ 3864 } 3865 ptr_idxs += olengths_idxs[i]; 3866 ptr_vals += olengths_vals[i]; 3867 } 3868 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3869 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3870 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3871 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3872 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3873 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3874 3875 #if 0 3876 if (!restrict_comm) { /* check */ 3877 Vec lvec,rvec; 3878 PetscReal infty_error; 3879 3880 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3881 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3882 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3883 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3884 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3885 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3886 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3887 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3888 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3889 } 3890 #endif 3891 3892 /* assemble new additional is (if any) */ 3893 if (nis) { 3894 PetscInt **temp_idxs,*count_is,j,psum; 3895 3896 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3897 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3898 ptr_idxs = recv_buffer_idxs_is; 3899 psum = 0; 3900 for (i=0;i<n_recvs;i++) { 3901 for (j=0;j<nis;j++) { 3902 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3903 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3904 psum += plen; 3905 ptr_idxs += plen+1; /* shift pointer to received data */ 3906 } 3907 } 3908 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3909 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3910 for (i=1;i<nis;i++) { 3911 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3912 } 3913 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3914 ptr_idxs = recv_buffer_idxs_is; 3915 for (i=0;i<n_recvs;i++) { 3916 for (j=0;j<nis;j++) { 3917 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3918 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3919 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3920 ptr_idxs += plen+1; /* shift pointer to received data */ 3921 } 3922 } 3923 for (i=0;i<nis;i++) { 3924 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3925 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3926 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3927 } 3928 ierr = PetscFree(count_is);CHKERRQ(ierr); 3929 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3930 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3931 } 3932 /* free workspace */ 3933 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3934 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3935 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3936 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3937 if (isdense) { 3938 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3939 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3940 } else { 3941 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3942 } 3943 if (nis) { 3944 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3945 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3946 } 3947 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3948 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3949 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3950 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3951 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3952 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3953 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3954 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3955 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3956 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3957 ierr = PetscFree(onodes);CHKERRQ(ierr); 3958 if (nis) { 3959 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3960 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3961 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3962 } 3963 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3964 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3965 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3966 for (i=0;i<nis;i++) { 3967 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3968 } 3969 *mat_n = NULL; 3970 } 3971 PetscFunctionReturn(0); 3972 } 3973 3974 /* temporary hack into ksp private data structure */ 3975 #include <petsc/private/kspimpl.h> 3976 3977 #undef __FUNCT__ 3978 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3979 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3980 { 3981 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3982 PC_IS *pcis = (PC_IS*)pc->data; 3983 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3984 MatNullSpace CoarseNullSpace=NULL; 3985 ISLocalToGlobalMapping coarse_islg; 3986 IS coarse_is,*isarray; 3987 PetscInt i,im_active=-1,active_procs=-1; 3988 PetscInt nis,nisdofs,nisneu; 3989 PC pc_temp; 3990 PCType coarse_pc_type; 3991 KSPType coarse_ksp_type; 3992 PetscBool multilevel_requested,multilevel_allowed; 3993 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3994 Mat t_coarse_mat_is; 3995 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3996 PetscMPIInt all_procs; 3997 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3998 PetscBool compute_vecs = PETSC_FALSE; 3999 PetscScalar *array; 4000 PetscErrorCode ierr; 4001 4002 PetscFunctionBegin; 4003 /* Assign global numbering to coarse dofs */ 4004 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4005 PetscInt ocoarse_size; 4006 compute_vecs = PETSC_TRUE; 4007 ocoarse_size = pcbddc->coarse_size; 4008 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4009 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4010 /* see if we can avoid some work */ 4011 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4012 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4013 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4014 PC pc; 4015 PetscBool isbddc; 4016 4017 /* temporary workaround since PCBDDC does not have a reset method so far */ 4018 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4019 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4020 if (isbddc) { 4021 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4022 } 4023 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4024 coarse_reuse = PETSC_FALSE; 4025 } else { /* we can safely reuse already computed coarse matrix */ 4026 coarse_reuse = PETSC_TRUE; 4027 } 4028 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4029 coarse_reuse = PETSC_FALSE; 4030 } 4031 /* reset any subassembling information */ 4032 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4033 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4034 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4035 coarse_reuse = PETSC_TRUE; 4036 } 4037 4038 /* count "active" (i.e. with positive local size) and "void" processes */ 4039 im_active = !!(pcis->n); 4040 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4041 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4042 void_procs = all_procs-active_procs; 4043 csin_type_simple = PETSC_TRUE; 4044 redist = PETSC_FALSE; 4045 if (pcbddc->current_level && void_procs) { 4046 csin_ml = PETSC_TRUE; 4047 ncoarse_ml = void_procs; 4048 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4049 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4050 csin_ds = PETSC_TRUE; 4051 ncoarse_ds = pcbddc->redistribute_coarse; 4052 redist = PETSC_TRUE; 4053 } else { 4054 csin_ds = PETSC_TRUE; 4055 ncoarse_ds = active_procs; 4056 redist = PETSC_TRUE; 4057 } 4058 } else { 4059 csin_ml = PETSC_FALSE; 4060 ncoarse_ml = all_procs; 4061 if (void_procs) { 4062 csin_ds = PETSC_TRUE; 4063 ncoarse_ds = void_procs; 4064 csin_type_simple = PETSC_FALSE; 4065 } else { 4066 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4067 csin_ds = PETSC_TRUE; 4068 ncoarse_ds = pcbddc->redistribute_coarse; 4069 redist = PETSC_TRUE; 4070 } else { 4071 csin_ds = PETSC_FALSE; 4072 ncoarse_ds = all_procs; 4073 } 4074 } 4075 } 4076 4077 /* 4078 test if we can go multilevel: three conditions must be satisfied: 4079 - we have not exceeded the number of levels requested 4080 - we can actually subassemble the active processes 4081 - we can find a suitable number of MPI processes where we can place the subassembled problem 4082 */ 4083 multilevel_allowed = PETSC_FALSE; 4084 multilevel_requested = PETSC_FALSE; 4085 if (pcbddc->current_level < pcbddc->max_levels) { 4086 multilevel_requested = PETSC_TRUE; 4087 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4088 multilevel_allowed = PETSC_FALSE; 4089 } else { 4090 multilevel_allowed = PETSC_TRUE; 4091 } 4092 } 4093 /* determine number of process partecipating to coarse solver */ 4094 if (multilevel_allowed) { 4095 ncoarse = ncoarse_ml; 4096 csin = csin_ml; 4097 redist = PETSC_FALSE; 4098 } else { 4099 ncoarse = ncoarse_ds; 4100 csin = csin_ds; 4101 } 4102 4103 /* creates temporary l2gmap and IS for coarse indexes */ 4104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4105 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4106 4107 /* creates temporary MATIS object for coarse matrix */ 4108 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4109 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4110 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4111 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4112 #if 0 4113 { 4114 PetscViewer viewer; 4115 char filename[256]; 4116 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4117 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4118 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4119 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4120 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4121 } 4122 #endif 4123 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4124 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4125 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4126 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4127 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4128 4129 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4130 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4131 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4132 const PetscInt *idxs; 4133 ISLocalToGlobalMapping tmap; 4134 4135 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4136 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4137 /* allocate space for temporary storage */ 4138 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4139 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4140 /* allocate for IS array */ 4141 nisdofs = pcbddc->n_ISForDofsLocal; 4142 nisneu = !!pcbddc->NeumannBoundariesLocal; 4143 nis = nisdofs + nisneu; 4144 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4145 /* dofs splitting */ 4146 for (i=0;i<nisdofs;i++) { 4147 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4148 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4149 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4150 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4151 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4152 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4153 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4154 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4155 } 4156 /* neumann boundaries */ 4157 if (pcbddc->NeumannBoundariesLocal) { 4158 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4159 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4160 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4161 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4162 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4163 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4164 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4165 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4166 } 4167 /* free memory */ 4168 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4169 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4170 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4171 } else { 4172 nis = 0; 4173 nisdofs = 0; 4174 nisneu = 0; 4175 isarray = NULL; 4176 } 4177 /* destroy no longer needed map */ 4178 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4179 4180 /* restrict on coarse candidates (if needed) */ 4181 coarse_mat_is = NULL; 4182 if (csin) { 4183 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4184 if (redist) { 4185 PetscMPIInt rank; 4186 PetscInt spc,n_spc_p1,dest[1],destsize; 4187 4188 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4189 spc = active_procs/ncoarse; 4190 n_spc_p1 = active_procs%ncoarse; 4191 if (im_active) { 4192 destsize = 1; 4193 if (rank > n_spc_p1*(spc+1)-1) { 4194 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4195 } else { 4196 dest[0] = rank/(spc+1); 4197 } 4198 } else { 4199 destsize = 0; 4200 } 4201 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4202 } else if (csin_type_simple) { 4203 PetscMPIInt rank; 4204 PetscInt issize,isidx; 4205 4206 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4207 if (im_active) { 4208 issize = 1; 4209 isidx = (PetscInt)rank; 4210 } else { 4211 issize = 0; 4212 isidx = -1; 4213 } 4214 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4215 } else { /* get a suitable subassembling pattern from MATIS code */ 4216 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4217 } 4218 4219 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4220 if (!redist || ncoarse <= void_procs) { 4221 PetscInt ncoarse_cand,tissize,*nisindices; 4222 PetscInt *coarse_candidates; 4223 const PetscInt* tisindices; 4224 4225 /* get coarse candidates' ranks in pc communicator */ 4226 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4227 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4228 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4229 if (!coarse_candidates[i]) { 4230 coarse_candidates[ncoarse_cand++]=i; 4231 } 4232 } 4233 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4234 4235 4236 if (pcbddc->dbg_flag) { 4237 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4238 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4239 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4240 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4241 for (i=0;i<ncoarse_cand;i++) { 4242 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4243 } 4244 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4245 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4246 } 4247 /* shift the pattern on coarse candidates */ 4248 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4249 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4250 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4251 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4252 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4253 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4254 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4255 } 4256 if (pcbddc->dbg_flag) { 4257 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4258 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4259 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4260 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4261 } 4262 } 4263 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4264 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4265 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4266 } else { /* this is the last level, so use just receiving processes in subcomm */ 4267 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4268 } 4269 } else { 4270 if (pcbddc->dbg_flag) { 4271 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4272 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4273 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4274 } 4275 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4276 coarse_mat_is = t_coarse_mat_is; 4277 } 4278 4279 /* create local to global scatters for coarse problem */ 4280 if (compute_vecs) { 4281 PetscInt lrows; 4282 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4283 if (coarse_mat_is) { 4284 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4285 } else { 4286 lrows = 0; 4287 } 4288 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4289 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4290 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4291 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4292 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4293 } 4294 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4295 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4296 4297 /* set defaults for coarse KSP and PC */ 4298 if (multilevel_allowed) { 4299 coarse_ksp_type = KSPRICHARDSON; 4300 coarse_pc_type = PCBDDC; 4301 } else { 4302 coarse_ksp_type = KSPPREONLY; 4303 coarse_pc_type = PCREDUNDANT; 4304 } 4305 4306 /* print some info if requested */ 4307 if (pcbddc->dbg_flag) { 4308 if (!multilevel_allowed) { 4309 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4310 if (multilevel_requested) { 4311 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4312 } else if (pcbddc->max_levels) { 4313 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4314 } 4315 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4316 } 4317 } 4318 4319 /* create the coarse KSP object only once with defaults */ 4320 if (coarse_mat_is) { 4321 MatReuse coarse_mat_reuse; 4322 PetscViewer dbg_viewer = NULL; 4323 if (pcbddc->dbg_flag) { 4324 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4325 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4326 } 4327 if (!pcbddc->coarse_ksp) { 4328 char prefix[256],str_level[16]; 4329 size_t len; 4330 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4331 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4332 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4333 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4334 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4335 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4336 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4337 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4338 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4339 /* prefix */ 4340 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4341 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4342 if (!pcbddc->current_level) { 4343 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4344 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4345 } else { 4346 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4347 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4348 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4349 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4350 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4351 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4352 } 4353 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4354 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4355 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4356 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4357 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4358 /* allow user customization */ 4359 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4360 } 4361 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4362 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4363 if (nisdofs) { 4364 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4365 for (i=0;i<nisdofs;i++) { 4366 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4367 } 4368 } 4369 if (nisneu) { 4370 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4371 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4372 } 4373 4374 /* get some info after set from options */ 4375 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4376 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4377 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4378 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4379 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4380 isbddc = PETSC_FALSE; 4381 } 4382 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4383 if (isredundant) { 4384 KSP inner_ksp; 4385 PC inner_pc; 4386 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4387 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4388 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4389 } 4390 4391 /* assemble coarse matrix */ 4392 if (coarse_reuse) { 4393 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4394 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4395 coarse_mat_reuse = MAT_REUSE_MATRIX; 4396 } else { 4397 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4398 } 4399 if (isbddc || isnn) { 4400 if (pcbddc->coarsening_ratio > 1) { 4401 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4402 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4403 if (pcbddc->dbg_flag) { 4404 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4405 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4406 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4407 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4408 } 4409 } 4410 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4411 } else { 4412 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4413 coarse_mat = coarse_mat_is; 4414 } 4415 } else { 4416 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4417 } 4418 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4419 4420 /* propagate symmetry info of coarse matrix */ 4421 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4422 if (pc->pmat->symmetric_set) { 4423 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4424 } 4425 if (pc->pmat->hermitian_set) { 4426 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4427 } 4428 if (pc->pmat->spd_set) { 4429 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4430 } 4431 /* set operators */ 4432 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4433 if (pcbddc->dbg_flag) { 4434 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4435 } 4436 } else { /* processes non partecipating to coarse solver (if any) */ 4437 coarse_mat = 0; 4438 } 4439 ierr = PetscFree(isarray);CHKERRQ(ierr); 4440 #if 0 4441 { 4442 PetscViewer viewer; 4443 char filename[256]; 4444 sprintf(filename,"coarse_mat.m"); 4445 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4446 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4447 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4448 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4449 } 4450 #endif 4451 4452 /* Compute coarse null space (special handling by BDDC only) */ 4453 #if 0 4454 if (pcbddc->NullSpace) { 4455 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4456 } 4457 #endif 4458 4459 if (pcbddc->coarse_ksp) { 4460 Vec crhs,csol; 4461 PetscBool ispreonly; 4462 4463 if (CoarseNullSpace) { 4464 if (isbddc) { 4465 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4466 } else { 4467 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4468 } 4469 } 4470 /* setup coarse ksp */ 4471 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4472 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4473 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4474 /* hack */ 4475 if (!csol) { 4476 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4477 } 4478 if (!crhs) { 4479 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4480 } 4481 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4482 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4483 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4484 KSP check_ksp; 4485 KSPType check_ksp_type; 4486 PC check_pc; 4487 Vec check_vec,coarse_vec; 4488 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4489 PetscInt its; 4490 PetscBool compute_eigs; 4491 PetscReal *eigs_r,*eigs_c; 4492 PetscInt neigs; 4493 const char *prefix; 4494 4495 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4496 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4497 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4498 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4499 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4500 if (ispreonly) { 4501 check_ksp_type = KSPPREONLY; 4502 compute_eigs = PETSC_FALSE; 4503 } else { 4504 check_ksp_type = KSPGMRES; 4505 compute_eigs = PETSC_TRUE; 4506 } 4507 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4508 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4509 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4510 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4511 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4512 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4513 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4514 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4515 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4516 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4517 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4518 /* create random vec */ 4519 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4520 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4521 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4522 if (CoarseNullSpace) { 4523 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4524 } 4525 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4526 /* solve coarse problem */ 4527 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4528 if (CoarseNullSpace) { 4529 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4530 } 4531 /* set eigenvalue estimation if preonly has not been requested */ 4532 if (compute_eigs) { 4533 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4534 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4535 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4536 lambda_max = eigs_r[neigs-1]; 4537 lambda_min = eigs_r[0]; 4538 if (pcbddc->use_coarse_estimates) { 4539 if (lambda_max>lambda_min) { 4540 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4541 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4542 } 4543 } 4544 } 4545 4546 /* check coarse problem residual error */ 4547 if (pcbddc->dbg_flag) { 4548 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4549 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4550 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4551 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4552 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4553 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4554 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4555 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4556 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4557 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4558 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4559 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4560 if (compute_eigs) { 4561 PetscReal lambda_max_s,lambda_min_s; 4562 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4563 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4564 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4565 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4566 for (i=0;i<neigs;i++) { 4567 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4568 } 4569 } 4570 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4571 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4572 } 4573 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4574 if (compute_eigs) { 4575 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4576 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4577 } 4578 } 4579 } 4580 /* print additional info */ 4581 if (pcbddc->dbg_flag) { 4582 /* waits until all processes reaches this point */ 4583 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4584 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4585 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4586 } 4587 4588 /* free memory */ 4589 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4590 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4591 PetscFunctionReturn(0); 4592 } 4593 4594 #undef __FUNCT__ 4595 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4596 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4597 { 4598 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4599 PC_IS* pcis = (PC_IS*)pc->data; 4600 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4601 IS subset,subset_mult,subset_n; 4602 PetscInt local_size,coarse_size=0; 4603 PetscInt *local_primal_indices=NULL; 4604 const PetscInt *t_local_primal_indices; 4605 PetscErrorCode ierr; 4606 4607 PetscFunctionBegin; 4608 /* Compute global number of coarse dofs */ 4609 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4610 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4611 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4612 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4613 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4614 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4615 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4616 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4617 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4618 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 4619 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4620 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4621 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4622 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4623 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4624 4625 /* check numbering */ 4626 if (pcbddc->dbg_flag) { 4627 PetscScalar coarsesum,*array; 4628 PetscInt i; 4629 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4630 4631 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4632 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4633 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4634 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4635 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4636 for (i=0;i<pcbddc->local_primal_size;i++) { 4637 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4638 } 4639 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4640 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4641 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4642 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4643 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4644 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4645 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4646 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4647 for (i=0;i<pcis->n;i++) { 4648 if (array[i] == 1.0) { 4649 set_error = PETSC_TRUE; 4650 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4651 } 4652 } 4653 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4654 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4655 for (i=0;i<pcis->n;i++) { 4656 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4657 } 4658 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4659 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4660 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4661 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4662 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4663 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4664 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4665 PetscInt *gidxs; 4666 4667 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4668 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4669 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4670 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4671 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4672 for (i=0;i<pcbddc->local_primal_size;i++) { 4673 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4674 } 4675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4676 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4677 } 4678 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4679 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4680 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4681 } 4682 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4683 /* get back data */ 4684 *coarse_size_n = coarse_size; 4685 *local_primal_indices_n = local_primal_indices; 4686 PetscFunctionReturn(0); 4687 } 4688 4689 #undef __FUNCT__ 4690 #define __FUNCT__ "PCBDDCGlobalToLocal" 4691 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4692 { 4693 IS localis_t; 4694 PetscInt i,lsize,*idxs,n; 4695 PetscScalar *vals; 4696 PetscErrorCode ierr; 4697 4698 PetscFunctionBegin; 4699 /* get indices in local ordering exploiting local to global map */ 4700 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4701 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4702 for (i=0;i<lsize;i++) vals[i] = 1.0; 4703 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4704 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4705 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4706 if (idxs) { /* multilevel guard */ 4707 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4708 } 4709 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4710 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4711 ierr = PetscFree(vals);CHKERRQ(ierr); 4712 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4713 /* now compute set in local ordering */ 4714 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4715 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4716 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4717 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4718 for (i=0,lsize=0;i<n;i++) { 4719 if (PetscRealPart(vals[i]) > 0.5) { 4720 lsize++; 4721 } 4722 } 4723 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4724 for (i=0,lsize=0;i<n;i++) { 4725 if (PetscRealPart(vals[i]) > 0.5) { 4726 idxs[lsize++] = i; 4727 } 4728 } 4729 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4730 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4731 *localis = localis_t; 4732 PetscFunctionReturn(0); 4733 } 4734 4735 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4736 #undef __FUNCT__ 4737 #define __FUNCT__ "PCBDDCMatMult_Private" 4738 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4739 { 4740 PCBDDCChange_ctx change_ctx; 4741 PetscErrorCode ierr; 4742 4743 PetscFunctionBegin; 4744 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4745 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4746 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4747 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4748 PetscFunctionReturn(0); 4749 } 4750 4751 #undef __FUNCT__ 4752 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4753 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4754 { 4755 PCBDDCChange_ctx change_ctx; 4756 PetscErrorCode ierr; 4757 4758 PetscFunctionBegin; 4759 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4760 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4761 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4762 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4763 PetscFunctionReturn(0); 4764 } 4765 4766 #undef __FUNCT__ 4767 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4768 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4769 { 4770 PC_IS *pcis=(PC_IS*)pc->data; 4771 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4772 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4773 Mat S_j; 4774 PetscInt *used_xadj,*used_adjncy; 4775 PetscBool free_used_adj; 4776 PetscErrorCode ierr; 4777 4778 PetscFunctionBegin; 4779 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4780 free_used_adj = PETSC_FALSE; 4781 if (pcbddc->sub_schurs_layers == -1) { 4782 used_xadj = NULL; 4783 used_adjncy = NULL; 4784 } else { 4785 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4786 used_xadj = pcbddc->mat_graph->xadj; 4787 used_adjncy = pcbddc->mat_graph->adjncy; 4788 } else if (pcbddc->computed_rowadj) { 4789 used_xadj = pcbddc->mat_graph->xadj; 4790 used_adjncy = pcbddc->mat_graph->adjncy; 4791 } else { 4792 PetscBool flg_row=PETSC_FALSE; 4793 const PetscInt *xadj,*adjncy; 4794 PetscInt nvtxs; 4795 4796 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4797 if (flg_row) { 4798 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4799 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4800 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4801 free_used_adj = PETSC_TRUE; 4802 } else { 4803 pcbddc->sub_schurs_layers = -1; 4804 used_xadj = NULL; 4805 used_adjncy = NULL; 4806 } 4807 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4808 } 4809 } 4810 4811 /* setup sub_schurs data */ 4812 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4813 if (!sub_schurs->use_mumps) { 4814 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4815 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4816 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4817 } else { 4818 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4819 PetscBool isseqaij; 4820 if (!pcbddc->use_vertices && reuse_solvers) { 4821 PetscInt n_vertices; 4822 4823 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4824 reuse_solvers = (PetscBool)!n_vertices; 4825 } 4826 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4827 if (!isseqaij) { 4828 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4829 if (matis->A == pcbddc->local_mat) { 4830 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4831 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4832 } else { 4833 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4834 } 4835 } 4836 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4837 } 4838 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4839 4840 /* free adjacency */ 4841 if (free_used_adj) { 4842 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4843 } 4844 PetscFunctionReturn(0); 4845 } 4846 4847 #undef __FUNCT__ 4848 #define __FUNCT__ "PCBDDCInitSubSchurs" 4849 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4850 { 4851 PC_IS *pcis=(PC_IS*)pc->data; 4852 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4853 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4854 PCBDDCGraph graph; 4855 PetscErrorCode ierr; 4856 4857 PetscFunctionBegin; 4858 /* attach interface graph for determining subsets */ 4859 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4860 IS verticesIS,verticescomm; 4861 PetscInt vsize,*idxs; 4862 4863 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4864 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4865 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4866 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4867 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4868 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4869 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4870 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4871 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4872 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4873 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4874 /* 4875 if (pcbddc->dbg_flag) { 4876 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4877 } 4878 */ 4879 } else { 4880 graph = pcbddc->mat_graph; 4881 } 4882 4883 /* sub_schurs init */ 4884 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4885 4886 /* free graph struct */ 4887 if (pcbddc->sub_schurs_rebuild) { 4888 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4889 } 4890 PetscFunctionReturn(0); 4891 } 4892