1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 111 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 112 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 118 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 119 { 120 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 121 PC_IS *pcis = (PC_IS*)pc->data; 122 VecType impVecType; 123 PetscInt n_constraints,n_R,old_size; 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 if (!pcbddc->ConstraintMatrix) { 128 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 129 } 130 /* get sizes */ 131 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 132 n_R = pcis->n-pcbddc->n_actual_vertices; 133 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 134 /* local work vectors (try to avoid unneeded work)*/ 135 /* R nodes */ 136 old_size = -1; 137 if (pcbddc->vec1_R) { 138 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 139 } 140 if (n_R != old_size) { 141 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 143 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 144 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 145 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 146 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 147 } 148 /* local primal dofs */ 149 old_size = -1; 150 if (pcbddc->vec1_P) { 151 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 152 } 153 if (pcbddc->local_primal_size != old_size) { 154 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 156 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 157 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 158 } 159 /* local explicit constraints */ 160 old_size = -1; 161 if (pcbddc->vec1_C) { 162 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 163 } 164 if (n_constraints && n_constraints != old_size) { 165 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 167 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 168 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "PCBDDCSetUpCorrection" 175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 176 { 177 PetscErrorCode ierr; 178 /* pointers to pcis and pcbddc */ 179 PC_IS* pcis = (PC_IS*)pc->data; 180 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 181 /* submatrices of local problem */ 182 Mat A_RV,A_VR,A_VV; 183 /* working matrices */ 184 Mat M1,M2,M3,C_CR; 185 /* working vectors */ 186 Vec vec1_C,vec2_C,vec1_V,vec2_V; 187 /* additional working stuff */ 188 IS is_aux; 189 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 190 const PetscScalar *array,*row_cmat_values; 191 const PetscInt *row_cmat_indices,*idx_R_local; 192 PetscInt *idx_V_B,*auxindices; 193 PetscInt n_vertices,n_constraints,size_of_constraint; 194 PetscInt i,j,n_R,n_D,n_B; 195 PetscBool unsymmetric_check; 196 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 197 MatType impMatType; 198 /* some shortcuts to scalars */ 199 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 200 /* for debugging purposes */ 201 PetscReal *coarsefunctions_errors,*constraints_errors; 202 203 PetscFunctionBegin; 204 /* get number of vertices (corners plus constraints with change of basis) 205 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 206 n_vertices = pcbddc->n_actual_vertices; 207 n_constraints = pcbddc->local_primal_size-n_vertices; 208 /* Set Non-overlapping dimensions */ 209 n_B = pcis->n_B; n_D = pcis->n - n_B; 210 n_R = pcis->n-n_vertices; 211 212 /* Set types for local objects needed by BDDC precondtioner */ 213 impMatType = MATSEQDENSE; 214 215 /* Allocating some extra storage just to be safe */ 216 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 217 for (i=0;i<pcis->n;i++) auxindices[i]=i; 218 219 /* vertices in boundary numbering */ 220 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 221 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 222 if (i != n_vertices) { 223 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 224 } 225 226 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 227 if (n_constraints) { 228 /* see if we can save some allocations */ 229 if (pcbddc->local_auxmat2) { 230 PetscInt on_R,on_constraints; 231 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 232 if (on_R != n_R || on_constraints != n_constraints) { 233 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 234 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 235 } 236 } 237 /* work vectors */ 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 239 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 240 /* auxiliary matrices */ 241 if (!pcbddc->local_auxmat2) { 242 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 243 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 244 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 245 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 246 } 247 248 /* Extract constraints on R nodes: C_{CR} */ 249 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 250 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 251 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 252 253 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 254 for (i=0;i<n_constraints;i++) { 255 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 256 /* Get row of constraint matrix in R numbering */ 257 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 259 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 260 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 261 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 262 /* Solve for row of constraint matrix in R numbering */ 263 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 264 /* Set values in local_auxmat2 */ 265 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 267 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 268 } 269 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 272 273 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 274 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 275 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 276 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 277 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 278 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 279 ierr = MatSetUp(M1);CHKERRQ(ierr); 280 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 281 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 282 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 283 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 284 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 285 ierr = MatDestroy(&M2);CHKERRQ(ierr); 286 ierr = MatDestroy(&M3);CHKERRQ(ierr); 287 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 288 if (!pcbddc->local_auxmat1) { 289 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } else { 291 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 292 } 293 } 294 295 /* Get submatrices from subdomain matrix */ 296 if (n_vertices) { 297 PetscInt ibs,mbs; 298 PetscBool issbaij; 299 Mat newmat; 300 301 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 302 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 303 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 304 if (ibs != mbs) { /* need to convert to SEQAIJ */ 305 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 309 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 310 } else { 311 /* this is safe */ 312 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 313 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 314 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 315 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 316 /* which of the two approaches is faster? */ 317 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 319 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 320 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 321 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 322 } else { 323 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 324 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 325 } 326 } 327 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 328 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 329 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 330 } 331 332 /* Matrix of coarse basis functions (local) */ 333 if (pcbddc->coarse_phi_B) { 334 PetscInt on_B,on_primal; 335 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 336 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 337 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 338 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 339 } 340 } 341 if (pcbddc->coarse_phi_D) { 342 PetscInt on_D,on_primal; 343 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 344 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 345 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 346 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 347 } 348 } 349 if (!pcbddc->coarse_phi_B) { 350 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 351 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 352 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 353 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 } 355 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 356 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 357 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 358 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 359 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 } 361 362 if (pcbddc->dbg_flag) { 363 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 364 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 365 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 366 } 367 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 368 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 369 370 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 371 372 /* vertices */ 373 for (i=0;i<n_vertices;i++) { 374 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 375 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 376 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 377 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 378 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 379 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 380 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 381 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 383 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 384 if (n_constraints) { 385 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 386 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 387 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 388 } 389 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 390 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 391 392 /* Set values in coarse basis function and subdomain part of coarse_mat */ 393 /* coarse basis functions */ 394 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 395 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 399 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 400 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 401 if (pcbddc->switch_static || pcbddc->dbg_flag) { 402 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 406 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 407 } 408 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 409 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 412 if (n_constraints) { 413 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 416 } 417 418 /* check */ 419 if (pcbddc->dbg_flag) { 420 /* assemble subdomain vector on local nodes */ 421 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 422 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 425 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 427 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 428 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 429 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 430 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 433 if (n_constraints) { 434 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 437 } 438 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 441 /* check saddle point solution */ 442 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 444 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 445 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 446 /* shift by the identity matrix */ 447 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 448 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 451 } 452 } 453 454 /* constraints */ 455 for (i=0;i<n_constraints;i++) { 456 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 457 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 458 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 459 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 460 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 461 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 462 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 463 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 464 if (n_vertices) { 465 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 466 } 467 /* Set values in coarse basis function and subdomain part of coarse_mat */ 468 /* coarse basis functions */ 469 j = i+n_vertices; /* don't touch this! */ 470 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 471 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 475 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 476 if (pcbddc->switch_static || pcbddc->dbg_flag) { 477 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 481 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 482 } 483 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 484 if (n_vertices) { 485 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 487 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 488 } 489 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 492 493 if (pcbddc->dbg_flag) { 494 /* assemble subdomain vector on nodes */ 495 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 496 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 498 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 499 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 500 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 501 /* assemble subdomain vector of lagrange multipliers */ 502 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 503 if (n_vertices) { 504 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 506 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 507 } 508 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 510 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 511 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 513 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 514 /* check saddle point solution */ 515 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 517 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 518 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 519 /* shift by the identity matrix */ 520 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 521 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 523 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 524 } 525 } 526 /* call assembling routines for local coarse basis */ 527 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 if (pcbddc->switch_static || pcbddc->dbg_flag) { 530 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 } 533 534 /* compute other basis functions for non-symmetric problems */ 535 /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/ 536 { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */ 537 PetscBool setsym; 538 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr); 539 if (!setsym) pcbddc->issym = PETSC_FALSE; 540 } 541 542 if (!pcbddc->issym) { 543 if (!pcbddc->coarse_psi_B) { 544 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 545 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 546 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 547 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 548 } 549 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 550 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 551 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 552 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 553 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 554 } 555 for (i=0;i<pcbddc->local_primal_size;i++) { 556 if (n_constraints) { 557 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 558 for (j=0;j<n_constraints;j++) { 559 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 560 } 561 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 562 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 563 } 564 if (i<n_vertices) { 565 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 566 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 567 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 568 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 569 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 570 if (n_constraints) { 571 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 572 } 573 } else { 574 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 575 } 576 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 577 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 578 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 579 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 580 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 581 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 582 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 583 if (i<n_vertices) { 584 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 585 } 586 if (pcbddc->switch_static || pcbddc->dbg_flag) { 587 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 588 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 589 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 590 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 591 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 592 } 593 594 if (pcbddc->dbg_flag) { 595 /* assemble subdomain vector on nodes */ 596 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 597 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 598 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 599 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 600 if (i<n_vertices) { 601 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 602 } 603 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 604 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 605 /* assemble subdomain vector of lagrange multipliers */ 606 for (j=0;j<pcbddc->local_primal_size;j++) { 607 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 608 } 609 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 610 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 611 /* check saddle point solution */ 612 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 613 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 614 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 615 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 616 /* shift by the identity matrix */ 617 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 618 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 619 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 620 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 621 } 622 } 623 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 624 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 625 if (pcbddc->switch_static || pcbddc->dbg_flag) { 626 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 627 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 628 } 629 unsymmetric_check = PETSC_TRUE; 630 } else { /* take references to already computed coarse basis */ 631 unsymmetric_check = PETSC_FALSE; 632 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 633 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 634 if (pcbddc->coarse_phi_D) { 635 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 636 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 637 } 638 } 639 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 640 /* Checking coarse_sub_mat and coarse basis functios */ 641 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 642 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 643 if (pcbddc->dbg_flag) { 644 Mat coarse_sub_mat; 645 Mat AUXMAT,TM1,TM2,TM3,TM4; 646 Mat coarse_phi_D,coarse_phi_B; 647 Mat coarse_psi_D,coarse_psi_B; 648 Mat A_II,A_BB,A_IB,A_BI; 649 MatType checkmattype=MATSEQAIJ; 650 PetscReal real_value; 651 652 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 653 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 654 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 655 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 656 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 657 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 658 if (unsymmetric_check) { 659 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 660 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 661 } 662 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 663 664 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 665 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 666 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 667 if (unsymmetric_check) { 668 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 669 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 670 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 671 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 672 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 674 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 675 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 676 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 677 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 } else { 681 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 682 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 683 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 684 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 685 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 686 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 687 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 688 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 689 } 690 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 691 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 692 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 693 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 694 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 695 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 696 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 701 for (i=0;i<pcbddc->local_primal_size;i++) { 702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 703 } 704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 705 for (i=0;i<pcbddc->local_primal_size;i++) { 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 707 } 708 if (unsymmetric_check) { 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 710 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 712 } 713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 714 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 715 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 716 } 717 } 718 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 719 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 720 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 721 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 722 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 723 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 724 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 725 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 726 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 727 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 728 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 729 if (unsymmetric_check) { 730 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 731 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 732 } 733 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 734 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 735 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 736 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 737 } 738 /* free memory */ 739 if (n_vertices) { 740 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 741 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 742 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 743 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 744 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 745 } 746 if (n_constraints) { 747 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 748 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 749 ierr = MatDestroy(&M1);CHKERRQ(ierr); 750 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 751 } 752 ierr = PetscFree(auxindices);CHKERRQ(ierr); 753 /* get back data */ 754 *coarse_submat_vals_n = coarse_submat_vals; 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 760 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 761 { 762 PC_IS* pcis = (PC_IS*)(pc->data); 763 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 764 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 765 PetscBool issbaij,isseqaij; 766 /* manage repeated solves */ 767 MatReuse reuse; 768 PetscErrorCode ierr; 769 770 PetscFunctionBegin; 771 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 772 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 773 } 774 /* get mat flags */ 775 reuse = MAT_INITIAL_MATRIX; 776 if (pc->setupcalled) { 777 if (pc->flag == SAME_NONZERO_PATTERN) { 778 reuse = MAT_REUSE_MATRIX; 779 } else { 780 reuse = MAT_INITIAL_MATRIX; 781 } 782 } 783 if (reuse == MAT_INITIAL_MATRIX) { 784 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 785 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 786 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 787 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 788 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 789 } 790 791 /* transform local matrices if needed */ 792 if (pcbddc->ChangeOfBasisMatrix) { 793 Mat change_mat_all; 794 PetscScalar *row_cmat_values; 795 PetscInt *row_cmat_indices; 796 PetscInt *nnz,*is_indices,*temp_indices; 797 PetscInt i,j,k,n_D,n_B; 798 799 /* Get Non-overlapping dimensions */ 800 n_B = pcis->n_B; 801 n_D = pcis->n-n_B; 802 803 /* compute nonzero structure of change of basis on all local nodes */ 804 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 805 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 806 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 807 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 808 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 809 k=1; 810 for (i=0;i<n_B;i++) { 811 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 812 nnz[is_indices[i]]=j; 813 if (k < j) k = j; 814 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 815 } 816 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 817 /* assemble change of basis matrix on the whole set of local dofs */ 818 ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr); 819 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 820 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 821 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 822 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 823 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 824 for (i=0;i<n_D;i++) { 825 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 826 } 827 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 828 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 829 for (i=0;i<n_B;i++) { 830 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 831 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 832 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 833 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 834 } 835 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 836 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 837 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 838 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 839 if (isseqaij) { 840 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 841 } else { 842 Mat work_mat; 843 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 844 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 845 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 846 } 847 /* 848 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 849 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 850 */ 851 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 852 ierr = PetscFree(nnz);CHKERRQ(ierr); 853 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 854 } else { 855 /* without change of basis, the local matrix is unchanged */ 856 if (!pcbddc->local_mat) { 857 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 858 pcbddc->local_mat = matis->A; 859 } 860 } 861 862 /* get submatrices */ 863 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 864 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 865 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 866 if (!issbaij) { 867 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 868 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 869 } else { 870 Mat newmat; 871 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 872 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 873 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 874 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 875 } 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 881 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 882 { 883 PC_IS* pcis = (PC_IS*)(pc->data); 884 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 885 IS is_aux1,is_aux2; 886 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 887 PetscInt n_vertices,i,j,n_R,n_D,n_B; 888 PetscInt vbs,bs; 889 PetscBT bitmask; 890 PetscErrorCode ierr; 891 892 PetscFunctionBegin; 893 /* 894 No need to setup local scatters if 895 - primal space is unchanged 896 AND 897 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 898 AND 899 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 900 */ 901 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 902 PetscFunctionReturn(0); 903 } 904 /* destroy old objects */ 905 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 906 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 907 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 908 /* Set Non-overlapping dimensions */ 909 n_B = pcis->n_B; n_D = pcis->n - n_B; 910 n_vertices = pcbddc->n_actual_vertices; 911 /* create auxiliary bitmask */ 912 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 913 for (i=0;i<n_vertices;i++) { 914 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 915 } 916 917 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 918 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 919 for (i=0, n_R=0; i<pcis->n; i++) { 920 if (!PetscBTLookup(bitmask,i)) { 921 idx_R_local[n_R] = i; 922 n_R++; 923 } 924 } 925 926 /* Block code */ 927 vbs = 1; 928 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 929 if (bs>1 && !(n_vertices%bs)) { 930 PetscBool is_blocked = PETSC_TRUE; 931 PetscInt *vary; 932 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 933 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 934 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 935 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 936 for (i=0; i<n_vertices; i++) { 937 if (vary[i]!=0 && vary[i]!=bs) { 938 is_blocked = PETSC_FALSE; 939 break; 940 } 941 } 942 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 943 vbs = bs; 944 for (i=0;i<n_R/vbs;i++) { 945 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 946 } 947 } 948 ierr = PetscFree(vary);CHKERRQ(ierr); 949 } 950 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 951 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 952 953 /* print some info if requested */ 954 if (pcbddc->dbg_flag) { 955 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 956 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 958 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 962 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 963 } 964 965 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 966 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 967 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 968 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 969 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 970 for (i=0; i<n_D; i++) { 971 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 972 } 973 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 974 for (i=0, j=0; i<n_R; i++) { 975 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 976 aux_array1[j++] = i; 977 } 978 } 979 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 980 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 981 for (i=0, j=0; i<n_B; i++) { 982 if (!PetscBTLookup(bitmask,is_indices[i])) { 983 aux_array2[j++] = i; 984 } 985 } 986 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 987 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 988 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 989 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 990 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 991 992 if (pcbddc->switch_static || pcbddc->dbg_flag) { 993 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 994 for (i=0, j=0; i<n_R; i++) { 995 if (PetscBTLookup(bitmask,idx_R_local[i])) { 996 aux_array1[j++] = i; 997 } 998 } 999 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1000 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1001 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1002 } 1003 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1004 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1011 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1012 { 1013 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1014 PC_IS *pcis = (PC_IS*)pc->data; 1015 PC pc_temp; 1016 Mat A_RR; 1017 MatReuse reuse; 1018 PetscScalar m_one = -1.0; 1019 PetscReal value; 1020 PetscInt n_D,n_R,ibs,mbs; 1021 PetscBool use_exact,use_exact_reduced,issbaij; 1022 PetscErrorCode ierr; 1023 /* prefixes stuff */ 1024 char dir_prefix[256],neu_prefix[256],str_level[16]; 1025 size_t len; 1026 1027 PetscFunctionBegin; 1028 1029 /* compute prefixes */ 1030 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1031 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1032 if (!pcbddc->current_level) { 1033 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1034 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1035 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1036 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1037 } else { 1038 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1039 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1040 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1041 len -= 15; /* remove "pc_bddc_coarse_" */ 1042 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1043 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1044 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1045 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1046 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1047 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1048 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1049 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1050 } 1051 1052 /* DIRICHLET PROBLEM */ 1053 /* Matrix for Dirichlet problem is pcis->A_II */ 1054 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1055 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1056 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1057 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1058 /* default */ 1059 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1060 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1061 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1062 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1063 if (issbaij) { 1064 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1065 } else { 1066 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1067 } 1068 /* Allow user's customization */ 1069 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1070 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1071 } 1072 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1073 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1074 if (!n_D) { 1075 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1076 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1077 } 1078 /* Set Up KSP for Dirichlet problem of BDDC */ 1079 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1080 /* set ksp_D into pcis data */ 1081 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1082 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1083 pcis->ksp_D = pcbddc->ksp_D; 1084 1085 /* NEUMANN PROBLEM */ 1086 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1087 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1088 if (pcbddc->ksp_R) { /* already created ksp */ 1089 PetscInt nn_R; 1090 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1091 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1092 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1093 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1094 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1095 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1096 reuse = MAT_INITIAL_MATRIX; 1097 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1098 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1099 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1100 reuse = MAT_INITIAL_MATRIX; 1101 } else { /* safe to reuse the matrix */ 1102 reuse = MAT_REUSE_MATRIX; 1103 } 1104 } 1105 /* last check */ 1106 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1107 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1108 reuse = MAT_INITIAL_MATRIX; 1109 } 1110 } else { /* first time, so we need to create the matrix */ 1111 reuse = MAT_INITIAL_MATRIX; 1112 } 1113 /* extract A_RR */ 1114 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1115 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1116 if (ibs != mbs) { 1117 Mat newmat; 1118 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1119 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1120 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1121 } else { 1122 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1123 } 1124 if (!pcbddc->ksp_R) { /* create object if not present */ 1125 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1126 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1127 /* default */ 1128 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1129 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1130 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1131 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1132 if (issbaij) { 1133 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1134 } else { 1135 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1136 } 1137 /* Allow user's customization */ 1138 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1139 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1140 } 1141 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1142 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1143 if (!n_R) { 1144 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1145 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1146 } 1147 /* Set Up KSP for Neumann problem of BDDC */ 1148 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1149 1150 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1151 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1152 /* Dirichlet */ 1153 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1154 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1155 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1156 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1157 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1158 /* need to be adapted? */ 1159 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1160 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1161 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1162 /* print info */ 1163 if (pcbddc->dbg_flag) { 1164 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1165 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1166 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1168 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1169 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1170 } 1171 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1172 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1173 } 1174 1175 /* Neumann */ 1176 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1177 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1178 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1179 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1180 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1181 /* need to be adapted? */ 1182 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1183 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1184 /* print info */ 1185 if (pcbddc->dbg_flag) { 1186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1187 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1188 } 1189 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1190 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1191 } 1192 } 1193 /* free Neumann problem's matrix */ 1194 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1200 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1201 { 1202 PetscErrorCode ierr; 1203 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1204 1205 PetscFunctionBegin; 1206 if (applytranspose) { 1207 if (pcbddc->local_auxmat1) { 1208 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1209 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1210 } 1211 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1212 } else { 1213 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1214 if (pcbddc->local_auxmat1) { 1215 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1216 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1217 } 1218 } 1219 PetscFunctionReturn(0); 1220 } 1221 1222 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1225 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1226 { 1227 PetscErrorCode ierr; 1228 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1229 PC_IS* pcis = (PC_IS*) (pc->data); 1230 const PetscScalar zero = 0.0; 1231 1232 PetscFunctionBegin; 1233 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1234 if (applytranspose) { 1235 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1236 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1237 } else { 1238 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1239 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1240 } 1241 /* start communications from local primal nodes to rhs of coarse solver */ 1242 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1243 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 1246 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1247 /* TODO remove null space when doing multilevel */ 1248 if (pcbddc->coarse_ksp) { 1249 if (applytranspose) { 1250 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1251 } else { 1252 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1253 } 1254 } 1255 /* start communications from coarse solver solution to local primal nodes */ 1256 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1257 1258 /* Local solution on R nodes */ 1259 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1260 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1261 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1262 if (pcbddc->switch_static) { 1263 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1265 } 1266 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1267 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1268 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 if (pcbddc->switch_static) { 1271 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1273 } 1274 1275 /* complete communications from coarse sol to local primal nodes */ 1276 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1277 1278 /* Sum contributions from two levels */ 1279 if (applytranspose) { 1280 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1281 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1282 } else { 1283 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1284 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1285 } 1286 PetscFunctionReturn(0); 1287 } 1288 1289 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1292 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1293 { 1294 PetscErrorCode ierr; 1295 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1296 PetscScalar *array,*array2; 1297 Vec from,to; 1298 1299 PetscFunctionBegin; 1300 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1301 from = pcbddc->coarse_vec; 1302 to = pcbddc->vec1_P; 1303 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1304 Vec tvec; 1305 PetscInt lsize; 1306 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1307 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1308 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1309 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1310 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1311 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1312 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1313 } 1314 } else { /* from local to global -> put data in coarse right hand side */ 1315 from = pcbddc->vec1_P; 1316 to = pcbddc->coarse_vec; 1317 } 1318 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1324 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1325 { 1326 PetscErrorCode ierr; 1327 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1328 PetscScalar *array,*array2; 1329 Vec from,to; 1330 1331 PetscFunctionBegin; 1332 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1333 from = pcbddc->coarse_vec; 1334 to = pcbddc->vec1_P; 1335 } else { /* from local to global -> put data in coarse right hand side */ 1336 from = pcbddc->vec1_P; 1337 to = pcbddc->coarse_vec; 1338 } 1339 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1340 if (smode == SCATTER_FORWARD) { 1341 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1342 Vec tvec; 1343 PetscInt lsize; 1344 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1345 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1346 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1347 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1348 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1349 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1350 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1351 } 1352 } 1353 PetscFunctionReturn(0); 1354 } 1355 1356 /* uncomment for testing purposes */ 1357 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1360 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1361 { 1362 PetscErrorCode ierr; 1363 PC_IS* pcis = (PC_IS*)(pc->data); 1364 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1365 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1366 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1367 MatType impMatType=MATSEQAIJ; 1368 /* one and zero */ 1369 PetscScalar one=1.0,zero=0.0; 1370 /* space to store constraints and their local indices */ 1371 PetscScalar *temp_quadrature_constraint; 1372 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1373 /* iterators */ 1374 PetscInt i,j,k,total_counts,temp_start_ptr; 1375 /* stuff to store connected components stored in pcbddc->mat_graph */ 1376 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1377 PetscInt n_ISForFaces,n_ISForEdges; 1378 /* near null space stuff */ 1379 MatNullSpace nearnullsp; 1380 const Vec *nearnullvecs; 1381 Vec *localnearnullsp; 1382 PetscBool nnsp_has_cnst; 1383 PetscInt nnsp_size; 1384 PetscScalar *array; 1385 /* BLAS integers */ 1386 PetscBLASInt lwork,lierr; 1387 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1388 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1389 /* LAPACK working arrays for SVD or POD */ 1390 PetscBool skip_lapack; 1391 PetscScalar *work; 1392 PetscReal *singular_vals; 1393 #if defined(PETSC_USE_COMPLEX) 1394 PetscReal *rwork; 1395 #endif 1396 #if defined(PETSC_MISSING_LAPACK_GESVD) 1397 PetscBLASInt Blas_one_2=1; 1398 PetscScalar *temp_basis,*correlation_mat; 1399 #else 1400 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1401 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1402 #endif 1403 /* reuse */ 1404 PetscInt olocal_primal_size; 1405 PetscInt *oprimal_indices_local_idxs; 1406 /* change of basis */ 1407 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1408 PetscBool boolforchange,qr_needed; 1409 PetscBT touched,change_basis,qr_needed_idx; 1410 /* auxiliary stuff */ 1411 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1412 /* some quantities */ 1413 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1414 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1415 1416 1417 PetscFunctionBegin; 1418 /* Destroy Mat objects computed previously */ 1419 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1420 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1421 /* Get index sets for faces, edges and vertices from graph */ 1422 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1423 /* free unneeded index sets */ 1424 if (!pcbddc->use_vertices) { 1425 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1426 } 1427 if (!pcbddc->use_edges) { 1428 for (i=0;i<n_ISForEdges;i++) { 1429 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1430 } 1431 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1432 n_ISForEdges = 0; 1433 } 1434 if (!pcbddc->use_faces) { 1435 for (i=0;i<n_ISForFaces;i++) { 1436 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1437 } 1438 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1439 n_ISForFaces = 0; 1440 } 1441 /* HACKS (the following two blocks of code) */ 1442 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1443 pcbddc->use_change_of_basis = PETSC_TRUE; 1444 if (!ISForEdges) { 1445 pcbddc->use_change_on_faces = PETSC_TRUE; 1446 } 1447 } 1448 if (pcbddc->NullSpace) { 1449 /* use_change_of_basis should be consistent among processors */ 1450 PetscBool tbool[2],gbool[2]; 1451 tbool [0] = pcbddc->use_change_of_basis; 1452 tbool [1] = pcbddc->use_change_on_faces; 1453 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1454 pcbddc->use_change_of_basis = gbool[0]; 1455 pcbddc->use_change_on_faces = gbool[1]; 1456 } 1457 /* print some info */ 1458 if (pcbddc->dbg_flag) { 1459 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1461 i = 0; 1462 if (ISForVertices) { 1463 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1464 } 1465 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1468 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1469 } 1470 /* check if near null space is attached to global mat */ 1471 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1472 if (nearnullsp) { 1473 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1474 /* remove any stored info */ 1475 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1476 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1477 /* store information for BDDC solver reuse */ 1478 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1479 pcbddc->onearnullspace = nearnullsp; 1480 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1481 for (i=0;i<nnsp_size;i++) { 1482 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1483 } 1484 } else { /* if near null space is not provided BDDC uses constants by default */ 1485 nnsp_size = 0; 1486 nnsp_has_cnst = PETSC_TRUE; 1487 } 1488 /* get max number of constraints on a single cc */ 1489 max_constraints = nnsp_size; 1490 if (nnsp_has_cnst) max_constraints++; 1491 1492 /* 1493 Evaluate maximum storage size needed by the procedure 1494 - temp_indices will contain start index of each constraint stored as follows 1495 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1496 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1497 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1498 */ 1499 total_counts = n_ISForFaces+n_ISForEdges; 1500 total_counts *= max_constraints; 1501 n_vertices = 0; 1502 if (ISForVertices) { 1503 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1504 } 1505 total_counts += n_vertices; 1506 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1507 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1508 total_counts = 0; 1509 max_size_of_constraint = 0; 1510 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1511 if (i<n_ISForEdges) { 1512 used_IS = &ISForEdges[i]; 1513 } else { 1514 used_IS = &ISForFaces[i-n_ISForEdges]; 1515 } 1516 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1517 total_counts += j; 1518 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1519 } 1520 total_counts *= max_constraints; 1521 total_counts += n_vertices; 1522 ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr); 1523 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr); 1524 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1525 /* get local part of global near null space vectors */ 1526 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1527 for (k=0;k<nnsp_size;k++) { 1528 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1529 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1531 } 1532 1533 /* whether or not to skip lapack calls */ 1534 skip_lapack = PETSC_TRUE; 1535 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1536 1537 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1538 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1539 PetscScalar temp_work; 1540 #if defined(PETSC_MISSING_LAPACK_GESVD) 1541 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1542 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1543 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1544 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1545 #if defined(PETSC_USE_COMPLEX) 1546 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1547 #endif 1548 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1549 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1550 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1551 lwork = -1; 1552 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1553 #if !defined(PETSC_USE_COMPLEX) 1554 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1555 #else 1556 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1557 #endif 1558 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1559 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1560 #else /* on missing GESVD */ 1561 /* SVD */ 1562 PetscInt max_n,min_n; 1563 max_n = max_size_of_constraint; 1564 min_n = max_constraints; 1565 if (max_size_of_constraint < max_constraints) { 1566 min_n = max_size_of_constraint; 1567 max_n = max_constraints; 1568 } 1569 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1570 #if defined(PETSC_USE_COMPLEX) 1571 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1572 #endif 1573 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1574 lwork = -1; 1575 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1576 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1577 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1578 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1579 #if !defined(PETSC_USE_COMPLEX) 1580 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1581 #else 1582 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1583 #endif 1584 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1585 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1586 #endif /* on missing GESVD */ 1587 /* Allocate optimal workspace */ 1588 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1589 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1590 } 1591 /* Now we can loop on constraining sets */ 1592 total_counts = 0; 1593 temp_indices[0] = 0; 1594 /* vertices */ 1595 if (ISForVertices) { 1596 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1597 if (nnsp_has_cnst) { /* consider all vertices */ 1598 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1599 for (i=0;i<n_vertices;i++) { 1600 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1601 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1602 total_counts++; 1603 } 1604 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1605 PetscBool used_vertex; 1606 for (i=0;i<n_vertices;i++) { 1607 used_vertex = PETSC_FALSE; 1608 k = 0; 1609 while (!used_vertex && k<nnsp_size) { 1610 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1611 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1612 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1613 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1614 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1615 total_counts++; 1616 used_vertex = PETSC_TRUE; 1617 } 1618 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1619 k++; 1620 } 1621 } 1622 } 1623 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1624 n_vertices = total_counts; 1625 } 1626 1627 /* edges and faces */ 1628 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1629 if (i<n_ISForEdges) { 1630 used_IS = &ISForEdges[i]; 1631 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1632 } else { 1633 used_IS = &ISForFaces[i-n_ISForEdges]; 1634 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1635 } 1636 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1637 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1638 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1639 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1640 /* change of basis should not be performed on local periodic nodes */ 1641 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1642 if (nnsp_has_cnst) { 1643 PetscScalar quad_value; 1644 temp_constraints++; 1645 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1646 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1647 for (j=0;j<size_of_constraint;j++) { 1648 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1649 } 1650 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1651 total_counts++; 1652 } 1653 for (k=0;k<nnsp_size;k++) { 1654 PetscReal real_value; 1655 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1656 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1657 for (j=0;j<size_of_constraint;j++) { 1658 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1659 } 1660 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1661 /* check if array is null on the connected component */ 1662 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1663 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1664 if (real_value > 0.0) { /* keep indices and values */ 1665 temp_constraints++; 1666 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1667 total_counts++; 1668 } 1669 } 1670 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1671 valid_constraints = temp_constraints; 1672 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1673 if (!pcbddc->use_nnsp_true && temp_constraints) { 1674 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1675 1676 #if defined(PETSC_MISSING_LAPACK_GESVD) 1677 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1678 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1679 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1680 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1681 from that computed using LAPACKgesvd 1682 -> This is due to a different computation of eigenvectors in LAPACKheev 1683 -> The quality of the POD-computed basis will be the same */ 1684 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1685 /* Store upper triangular part of correlation matrix */ 1686 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1687 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1688 for (j=0;j<temp_constraints;j++) { 1689 for (k=0;k<j+1;k++) { 1690 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1691 } 1692 } 1693 /* compute eigenvalues and eigenvectors of correlation matrix */ 1694 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1695 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1696 #if !defined(PETSC_USE_COMPLEX) 1697 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1698 #else 1699 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1700 #endif 1701 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1702 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1703 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1704 j=0; 1705 while (j < temp_constraints && singular_vals[j] < tol) j++; 1706 total_counts=total_counts-j; 1707 valid_constraints = temp_constraints-j; 1708 /* scale and copy POD basis into used quadrature memory */ 1709 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1710 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1711 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1712 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1713 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1715 if (j<temp_constraints) { 1716 PetscInt ii; 1717 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1718 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1719 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1720 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1721 for (k=0;k<temp_constraints-j;k++) { 1722 for (ii=0;ii<size_of_constraint;ii++) { 1723 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1724 } 1725 } 1726 } 1727 #else /* on missing GESVD */ 1728 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1729 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1731 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1732 #if !defined(PETSC_USE_COMPLEX) 1733 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1734 #else 1735 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1736 #endif 1737 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1738 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1739 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1740 k = temp_constraints; 1741 if (k > size_of_constraint) k = size_of_constraint; 1742 j = 0; 1743 while (j < k && singular_vals[k-j-1] < tol) j++; 1744 total_counts = total_counts-temp_constraints+k-j; 1745 valid_constraints = k-j; 1746 #endif /* on missing GESVD */ 1747 } 1748 /* setting change_of_basis flag is safe now */ 1749 if (boolforchange) { 1750 for (j=0;j<valid_constraints;j++) { 1751 PetscBTSet(change_basis,total_counts-j-1); 1752 } 1753 } 1754 } 1755 /* free index sets of faces, edges and vertices */ 1756 for (i=0;i<n_ISForFaces;i++) { 1757 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1758 } 1759 if (n_ISForFaces) { 1760 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1761 } 1762 for (i=0;i<n_ISForEdges;i++) { 1763 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1764 } 1765 if (n_ISForEdges) { 1766 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1767 } 1768 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1769 /* map temp_indices_to_constraint in boundary numbering */ 1770 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1771 if (i != temp_indices[total_counts]) { 1772 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1773 } 1774 1775 /* free workspace */ 1776 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1777 ierr = PetscFree(work);CHKERRQ(ierr); 1778 #if defined(PETSC_USE_COMPLEX) 1779 ierr = PetscFree(rwork);CHKERRQ(ierr); 1780 #endif 1781 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1782 #if defined(PETSC_MISSING_LAPACK_GESVD) 1783 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1784 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1785 #endif 1786 } 1787 for (k=0;k<nnsp_size;k++) { 1788 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1789 } 1790 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1791 1792 /* set quantities in pcbddc data structure and store previous primal size */ 1793 /* n_vertices defines the number of subdomain corners in the primal space */ 1794 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1795 olocal_primal_size = pcbddc->local_primal_size; 1796 pcbddc->local_primal_size = total_counts; 1797 pcbddc->n_vertices = n_vertices; 1798 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1799 1800 /* Create constraint matrix */ 1801 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1802 /* so we need to set it up properly either with or without change of basis */ 1803 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1804 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1805 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1806 /* array to compute a local numbering of constraints : vertices first then constraints */ 1807 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1808 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1809 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1810 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1811 /* auxiliary stuff for basis change */ 1812 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1813 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1814 1815 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1816 total_primal_vertices=0; 1817 for (i=0;i<pcbddc->local_primal_size;i++) { 1818 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1819 if (size_of_constraint == 1) { 1820 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1821 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1822 aux_primal_minloc[total_primal_vertices]=0; 1823 total_primal_vertices++; 1824 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1825 PetscInt min_loc,min_index; 1826 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1827 /* find first untouched local node */ 1828 k = 0; 1829 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1830 min_index = global_indices[k]; 1831 min_loc = k; 1832 /* search the minimum among global nodes already untouched on the cc */ 1833 for (k=1;k<size_of_constraint;k++) { 1834 /* there can be more than one constraint on a single connected component */ 1835 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1836 min_index = global_indices[k]; 1837 min_loc = k; 1838 } 1839 } 1840 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1841 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1842 aux_primal_minloc[total_primal_vertices]=min_loc; 1843 total_primal_vertices++; 1844 } 1845 } 1846 /* determine if a QR strategy is needed for change of basis */ 1847 qr_needed = PETSC_FALSE; 1848 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1849 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1850 if (PetscBTLookup(change_basis,i)) { 1851 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1852 j = 0; 1853 for (k=0;k<size_of_constraint;k++) { 1854 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1855 j++; 1856 } 1857 } 1858 /* found more than one primal dof on the cc */ 1859 if (j > 1) { 1860 PetscBTSet(qr_needed_idx,i); 1861 qr_needed = PETSC_TRUE; 1862 } 1863 } 1864 } 1865 /* free workspace */ 1866 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1867 1868 /* permute indices in order to have a sorted set of vertices */ 1869 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1870 1871 /* nonzero structure of constraint matrix */ 1872 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1873 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1874 j=total_primal_vertices; 1875 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1876 if (!PetscBTLookup(change_basis,i)) { 1877 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1878 j++; 1879 } 1880 } 1881 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1882 ierr = PetscFree(nnz);CHKERRQ(ierr); 1883 /* set values in constraint matrix */ 1884 for (i=0;i<total_primal_vertices;i++) { 1885 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1886 } 1887 total_counts = total_primal_vertices; 1888 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1889 if (!PetscBTLookup(change_basis,i)) { 1890 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1891 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1892 total_counts++; 1893 } 1894 } 1895 /* assembling */ 1896 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1897 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1898 /* 1899 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1900 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1901 */ 1902 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1903 if (pcbddc->use_change_of_basis) { 1904 /* dual and primal dofs on a single cc */ 1905 PetscInt dual_dofs,primal_dofs; 1906 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1907 PetscInt primal_counter; 1908 /* working stuff for GEQRF */ 1909 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1910 PetscBLASInt lqr_work; 1911 /* working stuff for UNGQR */ 1912 PetscScalar *gqr_work,lgqr_work_t; 1913 PetscBLASInt lgqr_work; 1914 /* working stuff for TRTRS */ 1915 PetscScalar *trs_rhs; 1916 PetscBLASInt Blas_NRHS; 1917 /* pointers for values insertion into change of basis matrix */ 1918 PetscInt *start_rows,*start_cols; 1919 PetscScalar *start_vals; 1920 /* working stuff for values insertion */ 1921 PetscBT is_primal; 1922 1923 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1924 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1925 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1926 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1927 /* work arrays */ 1928 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1929 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1930 /* nonzeros per row */ 1931 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1932 if (PetscBTLookup(change_basis,i)) { 1933 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1934 if (PetscBTLookup(qr_needed_idx,i)) { 1935 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1936 } else { 1937 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1938 /* get local primal index on the cc */ 1939 j = 0; 1940 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1941 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1942 } 1943 } 1944 } 1945 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1946 ierr = PetscFree(nnz);CHKERRQ(ierr); 1947 /* Set initial identity in the matrix */ 1948 for (i=0;i<pcis->n_B;i++) { 1949 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1950 } 1951 1952 if (pcbddc->dbg_flag) { 1953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1955 } 1956 1957 1958 /* Now we loop on the constraints which need a change of basis */ 1959 /* 1960 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1961 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1962 1963 Basic blocks of change of basis matrix T computed 1964 1965 - Using the following block transformation if there is only a primal dof on the cc 1966 (in the example, primal dof is the last one of the edge in LOCAL ordering 1967 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1968 | 1 0 ... 0 1 | 1969 | 0 1 ... 0 1 | 1970 | ... | 1971 | 0 ... 1 1 | 1972 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1973 1974 - via QR decomposition of constraints otherwise 1975 */ 1976 if (qr_needed) { 1977 /* space to store Q */ 1978 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 1979 /* first we issue queries for optimal work */ 1980 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1981 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1982 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1983 lqr_work = -1; 1984 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1985 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1986 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1987 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 1988 lgqr_work = -1; 1989 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1990 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1991 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1992 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1993 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1994 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1995 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1996 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1997 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 1998 /* array to store scaling factors for reflectors */ 1999 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2000 /* array to store rhs and solution of triangular solver */ 2001 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2002 /* allocating workspace for check */ 2003 if (pcbddc->dbg_flag) { 2004 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2005 } 2006 } 2007 /* array to store whether a node is primal or not */ 2008 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2009 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2010 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2011 if (i != total_primal_vertices) { 2012 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2013 } 2014 for (i=0;i<total_primal_vertices;i++) { 2015 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2016 } 2017 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2018 2019 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2020 /* -> using implicit ordering contained in temp_indices data */ 2021 total_counts = pcbddc->n_vertices; 2022 primal_counter = total_counts; 2023 while (total_counts<pcbddc->local_primal_size) { 2024 primal_dofs = 1; 2025 if (PetscBTLookup(change_basis,total_counts)) { 2026 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2027 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2028 primal_dofs++; 2029 } 2030 /* get constraint info */ 2031 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2032 dual_dofs = size_of_constraint-primal_dofs; 2033 2034 if (pcbddc->dbg_flag) { 2035 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2036 } 2037 2038 if (primal_dofs > 1) { /* QR */ 2039 2040 /* copy quadrature constraints for change of basis check */ 2041 if (pcbddc->dbg_flag) { 2042 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2043 } 2044 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2045 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2046 2047 /* compute QR decomposition of constraints */ 2048 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2049 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2050 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2051 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2052 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2053 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2054 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2055 2056 /* explictly compute R^-T */ 2057 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2058 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2059 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2060 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2061 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2062 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2063 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2064 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2065 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2066 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2067 2068 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2069 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2070 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2071 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2072 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2073 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2074 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2075 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2076 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2077 2078 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2079 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2080 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2081 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2082 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2083 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2084 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2085 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2087 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2088 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2089 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2090 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2091 2092 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2093 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2094 /* insert cols for primal dofs */ 2095 for (j=0;j<primal_dofs;j++) { 2096 start_vals = &qr_basis[j*size_of_constraint]; 2097 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2098 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2099 } 2100 /* insert cols for dual dofs */ 2101 for (j=0,k=0;j<dual_dofs;k++) { 2102 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2103 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2104 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2105 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2106 j++; 2107 } 2108 } 2109 2110 /* check change of basis */ 2111 if (pcbddc->dbg_flag) { 2112 PetscInt ii,jj; 2113 PetscBool valid_qr=PETSC_TRUE; 2114 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2115 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2116 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2117 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2118 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2119 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2120 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2121 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2122 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2123 for (jj=0;jj<size_of_constraint;jj++) { 2124 for (ii=0;ii<primal_dofs;ii++) { 2125 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2126 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2127 } 2128 } 2129 if (!valid_qr) { 2130 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2131 for (jj=0;jj<size_of_constraint;jj++) { 2132 for (ii=0;ii<primal_dofs;ii++) { 2133 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2134 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2135 } 2136 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2137 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2138 } 2139 } 2140 } 2141 } else { 2142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2143 } 2144 } 2145 } else { /* simple transformation block */ 2146 PetscInt row,col; 2147 PetscScalar val; 2148 for (j=0;j<size_of_constraint;j++) { 2149 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2150 if (!PetscBTLookup(is_primal,row)) { 2151 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2152 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2153 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2154 } else { 2155 for (k=0;k<size_of_constraint;k++) { 2156 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2157 if (row != col) { 2158 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2159 } else { 2160 val = 1.0; 2161 } 2162 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2163 } 2164 } 2165 } 2166 if (pcbddc->dbg_flag) { 2167 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2168 } 2169 } 2170 /* increment primal counter */ 2171 primal_counter += primal_dofs; 2172 } else { 2173 if (pcbddc->dbg_flag) { 2174 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2175 } 2176 } 2177 /* increment constraint counter total_counts */ 2178 total_counts += primal_dofs; 2179 } 2180 2181 /* free workspace */ 2182 if (qr_needed) { 2183 if (pcbddc->dbg_flag) { 2184 ierr = PetscFree(work);CHKERRQ(ierr); 2185 } 2186 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2187 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2188 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2189 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2190 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2191 } 2192 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2193 /* assembling */ 2194 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2195 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2196 /* 2197 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2198 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2199 */ 2200 } 2201 /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */ 2202 if (pcbddc->user_ChangeOfBasisMatrix) { 2203 PetscInt rows,cols; 2204 ierr = MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);CHKERRQ(ierr); 2205 if (rows == pcis->n && cols == pcis->n) { 2206 ierr = MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2207 } else { 2208 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2209 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2210 } 2211 } 2212 2213 /* get indices in local ordering for vertices and constraints */ 2214 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2215 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2216 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2217 } 2218 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2219 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2220 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2221 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2222 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2223 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2224 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2225 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2226 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2227 /* set quantities in PCBDDC data struct */ 2228 pcbddc->n_actual_vertices = i; 2229 /* check if a new primal space has been introduced */ 2230 pcbddc->new_primal_space_local = PETSC_TRUE; 2231 if (olocal_primal_size == pcbddc->local_primal_size) { 2232 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2233 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2234 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2235 } 2236 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2237 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2238 2239 /* flush dbg viewer */ 2240 if (pcbddc->dbg_flag) { 2241 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2242 } 2243 2244 /* free workspace */ 2245 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2246 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2247 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2248 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2249 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2250 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2251 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2252 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2258 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2259 { 2260 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2261 PC_IS *pcis = (PC_IS*)pc->data; 2262 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2263 PetscInt ierr,i,vertex_size; 2264 PetscViewer viewer=pcbddc->dbg_viewer; 2265 2266 PetscFunctionBegin; 2267 /* Reset previously computed graph */ 2268 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2269 /* Init local Graph struct */ 2270 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2271 2272 /* Check validity of the csr graph passed in by the user */ 2273 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2274 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2275 } 2276 2277 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2278 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2279 Mat mat_adj; 2280 const PetscInt *xadj,*adjncy; 2281 PetscBool flg_row=PETSC_TRUE; 2282 2283 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2284 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2285 if (!flg_row) { 2286 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2287 } 2288 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2289 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2290 if (!flg_row) { 2291 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2292 } 2293 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2294 } 2295 2296 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2297 vertex_size = 1; 2298 if (pcbddc->user_provided_isfordofs) { 2299 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2300 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2301 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2302 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2303 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2304 } 2305 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2306 pcbddc->n_ISForDofs = 0; 2307 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2308 } 2309 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2310 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2311 } else { 2312 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2313 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2314 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2315 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2316 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2317 } 2318 } 2319 } 2320 2321 /* Setup of Graph */ 2322 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2323 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2324 } 2325 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2326 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2327 } 2328 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2329 2330 /* Graph's connected components analysis */ 2331 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2332 2333 /* print some info to stdout */ 2334 if (pcbddc->dbg_flag) { 2335 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2336 } 2337 2338 /* mark topography has done */ 2339 pcbddc->recompute_topography = PETSC_FALSE; 2340 PetscFunctionReturn(0); 2341 } 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2345 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2346 { 2347 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2348 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2349 PetscErrorCode ierr; 2350 2351 PetscFunctionBegin; 2352 n = 0; 2353 vertices = 0; 2354 if (pcbddc->ConstraintMatrix) { 2355 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2356 for (i=0;i<local_primal_size;i++) { 2357 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2358 if (size_of_constraint == 1) n++; 2359 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2360 } 2361 if (vertices_idx) { 2362 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2363 n = 0; 2364 for (i=0;i<local_primal_size;i++) { 2365 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2366 if (size_of_constraint == 1) { 2367 vertices[n++]=row_cmat_indices[0]; 2368 } 2369 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2370 } 2371 } 2372 } 2373 *n_vertices = n; 2374 if (vertices_idx) *vertices_idx = vertices; 2375 PetscFunctionReturn(0); 2376 } 2377 2378 #undef __FUNCT__ 2379 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2380 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2381 { 2382 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2383 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2384 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2385 PetscBT touched; 2386 PetscErrorCode ierr; 2387 2388 /* This function assumes that the number of local constraints per connected component 2389 is not greater than the number of nodes defined for the connected component 2390 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2391 PetscFunctionBegin; 2392 n = 0; 2393 constraints_index = 0; 2394 if (pcbddc->ConstraintMatrix) { 2395 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2396 max_size_of_constraint = 0; 2397 for (i=0;i<local_primal_size;i++) { 2398 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2399 if (size_of_constraint > 1) { 2400 n++; 2401 } 2402 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2403 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2404 } 2405 if (constraints_idx) { 2406 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2407 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2408 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2409 n = 0; 2410 for (i=0;i<local_primal_size;i++) { 2411 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2412 if (size_of_constraint > 1) { 2413 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2414 /* find first untouched local node */ 2415 j = 0; 2416 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2417 min_index = row_cmat_global_indices[j]; 2418 min_loc = j; 2419 /* search the minimum among nodes not yet touched on the connected component 2420 since there can be more than one constraint on a single cc */ 2421 for (j=1;j<size_of_constraint;j++) { 2422 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2423 min_index = row_cmat_global_indices[j]; 2424 min_loc = j; 2425 } 2426 } 2427 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2428 constraints_index[n++] = row_cmat_indices[min_loc]; 2429 } 2430 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2431 } 2432 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2433 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2434 } 2435 } 2436 *n_constraints = n; 2437 if (constraints_idx) *constraints_idx = constraints_index; 2438 PetscFunctionReturn(0); 2439 } 2440 2441 /* the next two functions has been adapted from pcis.c */ 2442 #undef __FUNCT__ 2443 #define __FUNCT__ "PCBDDCApplySchur" 2444 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2445 { 2446 PetscErrorCode ierr; 2447 PC_IS *pcis = (PC_IS*)(pc->data); 2448 2449 PetscFunctionBegin; 2450 if (!vec2_B) { vec2_B = v; } 2451 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2452 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2453 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2454 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2455 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2456 PetscFunctionReturn(0); 2457 } 2458 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2461 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2462 { 2463 PetscErrorCode ierr; 2464 PC_IS *pcis = (PC_IS*)(pc->data); 2465 2466 PetscFunctionBegin; 2467 if (!vec2_B) { vec2_B = v; } 2468 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2469 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2470 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2471 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2472 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 #undef __FUNCT__ 2477 #define __FUNCT__ "PCBDDCSubsetNumbering" 2478 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2479 { 2480 Vec local_vec,global_vec; 2481 IS seqis,paris; 2482 VecScatter scatter_ctx; 2483 PetscScalar *array; 2484 PetscInt *temp_global_dofs; 2485 PetscScalar globalsum; 2486 PetscInt i,j,s; 2487 PetscInt nlocals,first_index,old_index,max_local; 2488 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2489 PetscMPIInt *dof_sizes,*dof_displs; 2490 PetscBool first_found; 2491 PetscErrorCode ierr; 2492 2493 PetscFunctionBegin; 2494 /* mpi buffers */ 2495 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2496 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2497 j = ( !rank_prec_comm ? size_prec_comm : 0); 2498 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2499 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2500 /* get maximum size of subset */ 2501 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2502 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2503 max_local = 0; 2504 for (i=0;i<n_local_dofs;i++) { 2505 if (max_local < temp_global_dofs[i] ) { 2506 max_local = temp_global_dofs[i]; 2507 } 2508 } 2509 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2510 max_global++; 2511 max_local = 0; 2512 for (i=0;i<n_local_dofs;i++) { 2513 if (max_local < local_dofs[i] ) { 2514 max_local = local_dofs[i]; 2515 } 2516 } 2517 max_local++; 2518 /* allocate workspace */ 2519 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2520 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2521 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2522 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2523 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2524 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2525 /* create scatter */ 2526 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2527 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2528 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2529 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2530 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2531 /* init array */ 2532 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2533 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2534 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2535 if (local_dofs_mult) { 2536 for (i=0;i<n_local_dofs;i++) { 2537 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2538 } 2539 } else { 2540 for (i=0;i<n_local_dofs;i++) { 2541 array[local_dofs[i]]=1.0; 2542 } 2543 } 2544 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2545 /* scatter into global vec and get total number of global dofs */ 2546 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2547 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2548 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2549 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2550 /* Fill global_vec with cumulative function for global numbering */ 2551 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2552 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2553 nlocals = 0; 2554 first_index = -1; 2555 first_found = PETSC_FALSE; 2556 for (i=0;i<s;i++) { 2557 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2558 first_found = PETSC_TRUE; 2559 first_index = i; 2560 } 2561 nlocals += (PetscInt)PetscRealPart(array[i]); 2562 } 2563 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2564 if (!rank_prec_comm) { 2565 dof_displs[0]=0; 2566 for (i=1;i<size_prec_comm;i++) { 2567 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2568 } 2569 } 2570 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2571 if (first_found) { 2572 array[first_index] += (PetscScalar)nlocals; 2573 old_index = first_index; 2574 for (i=first_index+1;i<s;i++) { 2575 if (PetscRealPart(array[i]) > 0.1) { 2576 array[i] += array[old_index]; 2577 old_index = i; 2578 } 2579 } 2580 } 2581 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2582 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2583 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2584 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2585 /* get global ordering of local dofs */ 2586 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2587 if (local_dofs_mult) { 2588 for (i=0;i<n_local_dofs;i++) { 2589 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2590 } 2591 } else { 2592 for (i=0;i<n_local_dofs;i++) { 2593 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2594 } 2595 } 2596 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2597 /* free workspace */ 2598 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2599 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2600 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2601 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2602 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2603 /* return pointer to global ordering of local dofs */ 2604 *global_numbering_subset = temp_global_dofs; 2605 PetscFunctionReturn(0); 2606 } 2607 2608 #undef __FUNCT__ 2609 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2610 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2611 { 2612 PetscInt i,j; 2613 PetscScalar *alphas; 2614 PetscErrorCode ierr; 2615 2616 PetscFunctionBegin; 2617 /* this implements stabilized Gram-Schmidt */ 2618 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2619 for (i=0;i<n;i++) { 2620 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2621 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2622 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2623 } 2624 ierr = PetscFree(alphas);CHKERRQ(ierr); 2625 PetscFunctionReturn(0); 2626 } 2627 2628 #undef __FUNCT__ 2629 #define __FUNCT__ "MatISGetSubassemblingPattern" 2630 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2631 { 2632 Mat subdomain_adj; 2633 IS new_ranks,ranks_send_to; 2634 MatPartitioning partitioner; 2635 Mat_IS *matis; 2636 PetscInt n_neighs,*neighs,*n_shared,**shared; 2637 PetscInt prank; 2638 PetscMPIInt size,rank,color; 2639 PetscInt *xadj,*adjncy,*oldranks; 2640 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2641 PetscInt i,j,local_size,threshold=0; 2642 PetscErrorCode ierr; 2643 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2644 PetscSubcomm subcomm; 2645 2646 PetscFunctionBegin; 2647 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2648 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2649 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2650 2651 /* Get info on mapping */ 2652 matis = (Mat_IS*)(mat->data); 2653 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2654 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2655 2656 /* build local CSR graph of subdomains' connectivity */ 2657 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2658 xadj[0] = 0; 2659 xadj[1] = PetscMax(n_neighs-1,0); 2660 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2661 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2662 2663 if (threshold) { 2664 PetscInt* count,min_threshold; 2665 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2666 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2667 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2668 for (j=0;j<n_shared[i];j++) { 2669 count[shared[i][j]] += 1; 2670 } 2671 } 2672 /* adapt threshold since we dont want to lose significant connections */ 2673 min_threshold = n_neighs; 2674 for (i=1;i<n_neighs;i++) { 2675 for (j=0;j<n_shared[i];j++) { 2676 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2677 } 2678 } 2679 threshold = PetscMax(min_threshold+1,threshold); 2680 xadj[1] = 0; 2681 for (i=1;i<n_neighs;i++) { 2682 for (j=0;j<n_shared[i];j++) { 2683 if (count[shared[i][j]] < threshold) { 2684 adjncy[xadj[1]] = neighs[i]; 2685 adjncy_wgt[xadj[1]] = n_shared[i]; 2686 xadj[1]++; 2687 break; 2688 } 2689 } 2690 } 2691 ierr = PetscFree(count);CHKERRQ(ierr); 2692 } else { 2693 if (xadj[1]) { 2694 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2695 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2696 } 2697 } 2698 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2699 if (use_square) { 2700 for (i=0;i<xadj[1];i++) { 2701 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2702 } 2703 } 2704 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2705 2706 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2707 2708 /* 2709 Restrict work on active processes only. 2710 */ 2711 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2712 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2713 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2714 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2715 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2716 if (color) { 2717 ierr = PetscFree(xadj);CHKERRQ(ierr); 2718 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2719 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2720 } else { 2721 PetscInt coarsening_ratio; 2722 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2723 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2724 prank = rank; 2725 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2726 /* 2727 for (i=0;i<size;i++) { 2728 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2729 } 2730 */ 2731 for (i=0;i<xadj[1];i++) { 2732 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2733 } 2734 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2735 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2736 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2737 2738 /* Partition */ 2739 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2740 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2741 if (use_vwgt) { 2742 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2743 v_wgt[0] = local_size; 2744 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2745 } 2746 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2747 coarsening_ratio = size/n_subdomains; 2748 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2749 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2750 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2751 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2752 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2753 2754 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2755 if (contiguous) { 2756 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2757 } else { 2758 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2759 } 2760 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2761 /* clean up */ 2762 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2763 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2764 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2765 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2766 } 2767 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2768 2769 /* assemble parallel IS for sends */ 2770 i = 1; 2771 if (color) i=0; 2772 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2773 2774 /* get back IS */ 2775 *is_sends = ranks_send_to; 2776 PetscFunctionReturn(0); 2777 } 2778 2779 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2780 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "MatISSubassemble" 2783 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2784 { 2785 Mat local_mat; 2786 Mat_IS *matis; 2787 IS is_sends_internal; 2788 PetscInt rows,cols; 2789 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2790 PetscBool ismatis,isdense,destroy_mat; 2791 ISLocalToGlobalMapping l2gmap; 2792 PetscInt* l2gmap_indices; 2793 const PetscInt* is_indices; 2794 MatType new_local_type; 2795 /* buffers */ 2796 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2797 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2798 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2799 /* MPI */ 2800 MPI_Comm comm,comm_n; 2801 PetscSubcomm subcomm; 2802 PetscMPIInt n_sends,n_recvs,commsize; 2803 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2804 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2805 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2806 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2807 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2808 PetscErrorCode ierr; 2809 2810 PetscFunctionBegin; 2811 /* TODO: add missing checks */ 2812 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2813 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2814 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2815 PetscValidLogicalCollectiveInt(mat,nis,7); 2816 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2817 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2818 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2819 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2820 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2821 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2822 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2823 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2824 PetscInt mrows,mcols,mnrows,mncols; 2825 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2826 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2827 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2828 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2829 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2830 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2831 } 2832 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2833 PetscValidLogicalCollectiveInt(mat,bs,0); 2834 /* prepare IS for sending if not provided */ 2835 if (!is_sends) { 2836 PetscBool pcontig = PETSC_TRUE; 2837 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2838 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 2839 } else { 2840 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2841 is_sends_internal = is_sends; 2842 } 2843 2844 /* get pointer of MATIS data */ 2845 matis = (Mat_IS*)mat->data; 2846 2847 /* get comm */ 2848 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2849 2850 /* compute number of sends */ 2851 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2852 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2853 2854 /* compute number of receives */ 2855 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2856 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 2857 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2858 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2859 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2860 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2861 ierr = PetscFree(iflags);CHKERRQ(ierr); 2862 2863 /* restrict comm if requested */ 2864 subcomm = 0; 2865 destroy_mat = PETSC_FALSE; 2866 if (restrict_comm) { 2867 PetscMPIInt color,rank,subcommsize; 2868 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2869 color = 0; 2870 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 2871 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2872 subcommsize = commsize - subcommsize; 2873 /* check if reuse has been requested */ 2874 if (reuse == MAT_REUSE_MATRIX) { 2875 if (*mat_n) { 2876 PetscMPIInt subcommsize2; 2877 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 2878 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 2879 comm_n = PetscObjectComm((PetscObject)*mat_n); 2880 } else { 2881 comm_n = PETSC_COMM_SELF; 2882 } 2883 } else { /* MAT_INITIAL_MATRIX */ 2884 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 2885 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 2886 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2887 comm_n = subcomm->comm; 2888 } 2889 /* flag to destroy *mat_n if not significative */ 2890 if (color) destroy_mat = PETSC_TRUE; 2891 } else { 2892 comm_n = comm; 2893 } 2894 2895 /* prepare send/receive buffers */ 2896 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 2897 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2898 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 2899 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2900 if (nis) { 2901 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 2902 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 2903 } 2904 2905 /* Get data from local matrices */ 2906 if (!isdense) { 2907 /* TODO: See below some guidelines on how to prepare the local buffers */ 2908 /* 2909 send_buffer_vals should contain the raw values of the local matrix 2910 send_buffer_idxs should contain: 2911 - MatType_PRIVATE type 2912 - PetscInt size_of_l2gmap 2913 - PetscInt global_row_indices[size_of_l2gmap] 2914 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2915 */ 2916 } else { 2917 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2918 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2919 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 2920 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2921 send_buffer_idxs[1] = i; 2922 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2923 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2924 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2925 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2926 for (i=0;i<n_sends;i++) { 2927 ilengths_vals[is_indices[i]] = len*len; 2928 ilengths_idxs[is_indices[i]] = len+2; 2929 } 2930 } 2931 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2932 /* additional is (if any) */ 2933 if (nis) { 2934 PetscMPIInt psum; 2935 PetscInt j; 2936 for (j=0,psum=0;j<nis;j++) { 2937 PetscInt plen; 2938 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 2939 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 2940 psum += len+1; /* indices + lenght */ 2941 } 2942 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 2943 for (j=0,psum=0;j<nis;j++) { 2944 PetscInt plen; 2945 const PetscInt *is_array_idxs; 2946 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 2947 send_buffer_idxs_is[psum] = plen; 2948 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 2949 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 2950 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 2951 psum += plen+1; /* indices + lenght */ 2952 } 2953 for (i=0;i<n_sends;i++) { 2954 ilengths_idxs_is[is_indices[i]] = psum; 2955 } 2956 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 2957 } 2958 2959 buf_size_idxs = 0; 2960 buf_size_vals = 0; 2961 buf_size_idxs_is = 0; 2962 for (i=0;i<n_recvs;i++) { 2963 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2964 buf_size_vals += (PetscInt)olengths_vals[i]; 2965 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 2966 } 2967 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 2968 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 2969 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 2970 2971 /* get new tags for clean communications */ 2972 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2973 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2974 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 2975 2976 /* allocate for requests */ 2977 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 2978 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 2979 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 2980 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 2981 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 2982 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 2983 2984 /* communications */ 2985 ptr_idxs = recv_buffer_idxs; 2986 ptr_vals = recv_buffer_vals; 2987 ptr_idxs_is = recv_buffer_idxs_is; 2988 for (i=0;i<n_recvs;i++) { 2989 source_dest = onodes[i]; 2990 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2991 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2992 ptr_idxs += olengths_idxs[i]; 2993 ptr_vals += olengths_vals[i]; 2994 if (nis) { 2995 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 2996 ptr_idxs_is += olengths_idxs_is[i]; 2997 } 2998 } 2999 for (i=0;i<n_sends;i++) { 3000 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3001 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3002 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3003 if (nis) { 3004 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3005 } 3006 } 3007 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3008 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3009 3010 /* assemble new l2g map */ 3011 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3012 ptr_idxs = recv_buffer_idxs; 3013 buf_size_idxs = 0; 3014 for (i=0;i<n_recvs;i++) { 3015 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3016 ptr_idxs += olengths_idxs[i]; 3017 } 3018 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 3019 ptr_idxs = recv_buffer_idxs; 3020 buf_size_idxs = 0; 3021 for (i=0;i<n_recvs;i++) { 3022 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3023 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3024 ptr_idxs += olengths_idxs[i]; 3025 } 3026 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3027 ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3028 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3029 3030 /* infer new local matrix type from received local matrices type */ 3031 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3032 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3033 if (n_recvs) { 3034 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3035 ptr_idxs = recv_buffer_idxs; 3036 for (i=0;i<n_recvs;i++) { 3037 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3038 new_local_type_private = MATAIJ_PRIVATE; 3039 break; 3040 } 3041 ptr_idxs += olengths_idxs[i]; 3042 } 3043 switch (new_local_type_private) { 3044 case MATDENSE_PRIVATE: 3045 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3046 new_local_type = MATSEQAIJ; 3047 bs = 1; 3048 } else { /* if I receive only 1 dense matrix */ 3049 new_local_type = MATSEQDENSE; 3050 bs = 1; 3051 } 3052 break; 3053 case MATAIJ_PRIVATE: 3054 new_local_type = MATSEQAIJ; 3055 bs = 1; 3056 break; 3057 case MATBAIJ_PRIVATE: 3058 new_local_type = MATSEQBAIJ; 3059 break; 3060 case MATSBAIJ_PRIVATE: 3061 new_local_type = MATSEQSBAIJ; 3062 break; 3063 default: 3064 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3065 break; 3066 } 3067 } else { /* by default, new_local_type is seqdense */ 3068 new_local_type = MATSEQDENSE; 3069 bs = 1; 3070 } 3071 3072 /* create MATIS object if needed */ 3073 if (reuse == MAT_INITIAL_MATRIX) { 3074 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3075 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3076 } else { 3077 /* it also destroys the local matrices */ 3078 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3079 } 3080 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3081 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3082 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3083 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3084 3085 /* set values */ 3086 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3087 ptr_vals = recv_buffer_vals; 3088 ptr_idxs = recv_buffer_idxs; 3089 for (i=0;i<n_recvs;i++) { 3090 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3091 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3092 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3093 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3094 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3095 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3096 } else { 3097 /* TODO */ 3098 } 3099 ptr_idxs += olengths_idxs[i]; 3100 ptr_vals += olengths_vals[i]; 3101 } 3102 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3103 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3104 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3105 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3106 3107 #if 0 3108 if (!restrict_comm) { /* check */ 3109 Vec lvec,rvec; 3110 PetscReal infty_error; 3111 3112 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3113 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3114 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3115 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3116 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3117 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3118 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3119 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3120 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3121 } 3122 #endif 3123 3124 /* assemble new additional is (if any) */ 3125 if (nis) { 3126 PetscInt **temp_idxs,*count_is,j,psum; 3127 3128 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3129 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3130 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3131 ptr_idxs = recv_buffer_idxs_is; 3132 psum = 0; 3133 for (i=0;i<n_recvs;i++) { 3134 for (j=0;j<nis;j++) { 3135 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3136 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3137 psum += plen; 3138 ptr_idxs += plen+1; /* shift pointer to received data */ 3139 } 3140 } 3141 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3142 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3143 for (i=1;i<nis;i++) { 3144 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3145 } 3146 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3147 ptr_idxs = recv_buffer_idxs_is; 3148 for (i=0;i<n_recvs;i++) { 3149 for (j=0;j<nis;j++) { 3150 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3151 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3152 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3153 ptr_idxs += plen+1; /* shift pointer to received data */ 3154 } 3155 } 3156 for (i=0;i<nis;i++) { 3157 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3158 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3159 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3160 } 3161 ierr = PetscFree(count_is);CHKERRQ(ierr); 3162 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3163 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3164 } 3165 /* free workspace */ 3166 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3167 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3168 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3169 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3170 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3171 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3172 if (isdense) { 3173 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3174 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3175 } else { 3176 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3177 } 3178 if (nis) { 3179 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3180 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3181 } 3182 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3183 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3184 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3185 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3186 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3187 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3188 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3189 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3190 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3191 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3192 ierr = PetscFree(onodes);CHKERRQ(ierr); 3193 if (nis) { 3194 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3195 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3196 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3197 } 3198 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3199 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3200 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3201 for (i=0;i<nis;i++) { 3202 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3203 } 3204 } 3205 PetscFunctionReturn(0); 3206 } 3207 3208 /* temporary hack into ksp private data structure */ 3209 #include <petsc-private/kspimpl.h> 3210 3211 #undef __FUNCT__ 3212 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3213 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3214 { 3215 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3216 PC_IS *pcis = (PC_IS*)pc->data; 3217 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3218 MatNullSpace CoarseNullSpace=NULL; 3219 ISLocalToGlobalMapping coarse_islg; 3220 IS coarse_is,*isarray; 3221 PetscInt i,im_active=-1,active_procs=-1; 3222 PetscInt nis,nisdofs,nisneu; 3223 PC pc_temp; 3224 PCType coarse_pc_type; 3225 KSPType coarse_ksp_type; 3226 PetscBool multilevel_requested,multilevel_allowed; 3227 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3228 Mat t_coarse_mat_is; 3229 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3230 PetscMPIInt all_procs; 3231 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3232 PetscBool compute_vecs = PETSC_FALSE; 3233 PetscErrorCode ierr; 3234 3235 PetscFunctionBegin; 3236 /* Assign global numbering to coarse dofs */ 3237 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3238 compute_vecs = PETSC_TRUE; 3239 PetscInt ocoarse_size; 3240 ocoarse_size = pcbddc->coarse_size; 3241 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3242 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3243 /* see if we can avoid some work */ 3244 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3245 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3246 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3247 coarse_reuse = PETSC_FALSE; 3248 } else { /* we can safely reuse already computed coarse matrix */ 3249 coarse_reuse = PETSC_TRUE; 3250 } 3251 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3252 coarse_reuse = PETSC_FALSE; 3253 } 3254 /* reset any subassembling information */ 3255 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3256 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3257 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3258 coarse_reuse = PETSC_TRUE; 3259 } 3260 3261 /* count "active" (i.e. with positive local size) and "void" processes */ 3262 im_active = !!(pcis->n); 3263 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3264 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3265 void_procs = all_procs-active_procs; 3266 csin_type_simple = PETSC_TRUE; 3267 if (pcbddc->current_level) { 3268 csin_ml = PETSC_TRUE; 3269 ncoarse_ml = void_procs; 3270 csin_ds = PETSC_TRUE; 3271 ncoarse_ds = void_procs; 3272 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3273 } else { 3274 csin_ml = PETSC_FALSE; 3275 ncoarse_ml = all_procs; 3276 if (void_procs) { 3277 csin_ds = PETSC_TRUE; 3278 ncoarse_ds = void_procs; 3279 csin_type_simple = PETSC_FALSE; 3280 } else { 3281 csin_ds = PETSC_FALSE; 3282 ncoarse_ds = all_procs; 3283 } 3284 } 3285 3286 /* 3287 test if we can go multilevel: three conditions must be satisfied: 3288 - we have not exceeded the number of levels requested 3289 - we can actually subassemble the active processes 3290 - we can find a suitable number of MPI processes where we can place the subassembled problem 3291 */ 3292 multilevel_allowed = PETSC_FALSE; 3293 multilevel_requested = PETSC_FALSE; 3294 if (pcbddc->current_level < pcbddc->max_levels) { 3295 multilevel_requested = PETSC_TRUE; 3296 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3297 multilevel_allowed = PETSC_FALSE; 3298 } else { 3299 multilevel_allowed = PETSC_TRUE; 3300 } 3301 } 3302 /* determine number of process partecipating to coarse solver */ 3303 if (multilevel_allowed) { 3304 ncoarse = ncoarse_ml; 3305 csin = csin_ml; 3306 } else { 3307 ncoarse = ncoarse_ds; 3308 csin = csin_ds; 3309 } 3310 3311 /* creates temporary l2gmap and IS for coarse indexes */ 3312 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3313 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3314 3315 /* creates temporary MATIS object for coarse matrix */ 3316 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3317 #if 0 3318 { 3319 PetscViewer viewer; 3320 char filename[256]; 3321 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3322 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3323 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3324 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3325 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3326 } 3327 #endif 3328 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3329 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3330 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3331 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3332 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3333 3334 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3335 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3336 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3337 const PetscInt *idxs; 3338 ISLocalToGlobalMapping tmap; 3339 3340 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3341 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3342 /* allocate space for temporary storage */ 3343 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3344 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3345 /* allocate for IS array */ 3346 nisdofs = pcbddc->n_ISForDofsLocal; 3347 nisneu = !!pcbddc->NeumannBoundariesLocal; 3348 nis = nisdofs + nisneu; 3349 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3350 /* dofs splitting */ 3351 for (i=0;i<nisdofs;i++) { 3352 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3353 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3354 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3355 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3356 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3357 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3358 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3359 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3360 } 3361 /* neumann boundaries */ 3362 if (pcbddc->NeumannBoundariesLocal) { 3363 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3364 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3365 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3366 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3367 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3368 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3369 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3370 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3371 } 3372 /* free memory */ 3373 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3374 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3375 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3376 } else { 3377 nis = 0; 3378 nisdofs = 0; 3379 nisneu = 0; 3380 isarray = NULL; 3381 } 3382 /* destroy no longer needed map */ 3383 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3384 3385 /* restrict on coarse candidates (if needed) */ 3386 coarse_mat_is = NULL; 3387 if (csin) { 3388 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3389 PetscInt j,tissize,*nisindices; 3390 PetscInt *coarse_candidates; 3391 const PetscInt* tisindices; 3392 /* get coarse candidates' ranks in pc communicator */ 3393 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3394 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3395 for (i=0,j=0;i<all_procs;i++) { 3396 if (!coarse_candidates[i]) { 3397 coarse_candidates[j]=i; 3398 j++; 3399 } 3400 } 3401 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3402 /* get a suitable subassembling pattern */ 3403 if (csin_type_simple) { 3404 PetscMPIInt rank; 3405 PetscInt issize,isidx; 3406 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3407 if (im_active) { 3408 issize = 1; 3409 isidx = (PetscInt)rank; 3410 } else { 3411 issize = 0; 3412 isidx = -1; 3413 } 3414 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3415 } else { 3416 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3417 } 3418 if (pcbddc->dbg_flag) { 3419 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3420 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3421 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3422 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3423 for (i=0;i<j;i++) { 3424 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3425 } 3426 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3427 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3428 } 3429 /* shift the pattern on coarse candidates */ 3430 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3431 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3432 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3433 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3434 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3435 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3436 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3437 } 3438 if (pcbddc->dbg_flag) { 3439 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3440 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3441 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3442 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3443 } 3444 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3445 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3446 } else { 3447 if (pcbddc->dbg_flag) { 3448 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3449 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3450 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3451 } 3452 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3453 coarse_mat_is = t_coarse_mat_is; 3454 } 3455 3456 /* create local to global scatters for coarse problem */ 3457 if (compute_vecs) { 3458 PetscInt lrows; 3459 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3460 if (coarse_mat_is) { 3461 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3462 } else { 3463 lrows = 0; 3464 } 3465 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3466 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3467 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3468 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3469 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3470 } 3471 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3472 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3473 3474 /* set defaults for coarse KSP and PC */ 3475 if (multilevel_allowed) { 3476 coarse_ksp_type = KSPRICHARDSON; 3477 coarse_pc_type = PCBDDC; 3478 } else { 3479 coarse_ksp_type = KSPPREONLY; 3480 coarse_pc_type = PCREDUNDANT; 3481 } 3482 3483 /* print some info if requested */ 3484 if (pcbddc->dbg_flag) { 3485 if (!multilevel_allowed) { 3486 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3487 if (multilevel_requested) { 3488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3489 } else if (pcbddc->max_levels) { 3490 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3491 } 3492 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3493 } 3494 } 3495 3496 /* create the coarse KSP object only once with defaults */ 3497 if (coarse_mat_is) { 3498 MatReuse coarse_mat_reuse; 3499 PetscViewer dbg_viewer = NULL; 3500 if (pcbddc->dbg_flag) { 3501 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3502 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3503 } 3504 if (!pcbddc->coarse_ksp) { 3505 char prefix[256],str_level[16]; 3506 size_t len; 3507 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3508 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3509 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3510 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3511 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3512 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3513 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3514 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3515 /* prefix */ 3516 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3517 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3518 if (!pcbddc->current_level) { 3519 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3520 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3521 } else { 3522 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3523 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3524 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3525 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3526 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3527 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3528 } 3529 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3530 /* allow user customization */ 3531 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3532 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3533 } 3534 3535 /* get some info after set from options */ 3536 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3537 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3538 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3539 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3540 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3541 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3542 isbddc = PETSC_FALSE; 3543 } 3544 if (isredundant) { 3545 KSP inner_ksp; 3546 PC inner_pc; 3547 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3548 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3549 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3550 } 3551 3552 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3553 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3554 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3555 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3556 if (nisdofs) { 3557 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3558 for (i=0;i<nisdofs;i++) { 3559 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3560 } 3561 } 3562 if (nisneu) { 3563 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3564 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3565 } 3566 3567 /* assemble coarse matrix */ 3568 if (coarse_reuse) { 3569 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3570 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3571 coarse_mat_reuse = MAT_REUSE_MATRIX; 3572 } else { 3573 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3574 } 3575 if (isbddc || isnn) { 3576 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3577 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3578 if (pcbddc->dbg_flag) { 3579 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3580 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3581 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3582 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3583 } 3584 } 3585 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3586 } else { 3587 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3588 } 3589 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3590 3591 /* propagate symmetry info to coarse matrix */ 3592 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3593 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3594 3595 /* set operators */ 3596 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3597 if (pcbddc->dbg_flag) { 3598 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3599 } 3600 } else { /* processes non partecipating to coarse solver (if any) */ 3601 coarse_mat = 0; 3602 } 3603 ierr = PetscFree(isarray);CHKERRQ(ierr); 3604 #if 0 3605 { 3606 PetscViewer viewer; 3607 char filename[256]; 3608 sprintf(filename,"coarse_mat.m"); 3609 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3610 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3611 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3612 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3613 } 3614 #endif 3615 3616 /* Compute coarse null space (special handling by BDDC only) */ 3617 if (pcbddc->NullSpace) { 3618 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3619 } 3620 3621 if (pcbddc->coarse_ksp) { 3622 Vec crhs,csol; 3623 PetscBool ispreonly; 3624 if (CoarseNullSpace) { 3625 if (isbddc) { 3626 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3627 } else { 3628 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3629 } 3630 } 3631 /* setup coarse ksp */ 3632 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3633 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3634 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3635 /* hack */ 3636 if (!csol) { 3637 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3638 } 3639 if (!crhs) { 3640 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3641 } 3642 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3643 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3644 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3645 KSP check_ksp; 3646 KSPType check_ksp_type; 3647 PC check_pc; 3648 Vec check_vec,coarse_vec; 3649 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3650 PetscInt its; 3651 PetscBool compute_eigs; 3652 PetscReal *eigs_r,*eigs_c; 3653 PetscInt neigs; 3654 const char *prefix; 3655 3656 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3657 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3658 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3659 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3660 if (ispreonly) { 3661 check_ksp_type = KSPPREONLY; 3662 compute_eigs = PETSC_FALSE; 3663 } else { 3664 check_ksp_type = KSPGMRES; 3665 compute_eigs = PETSC_TRUE; 3666 } 3667 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3668 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3669 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3670 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3671 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3672 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3673 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3674 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3675 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3676 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3677 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3678 /* create random vec */ 3679 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3680 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3681 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3682 if (CoarseNullSpace) { 3683 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3684 } 3685 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3686 /* solve coarse problem */ 3687 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3688 if (CoarseNullSpace) { 3689 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3690 } 3691 /* set eigenvalue estimation if preonly has not been requested */ 3692 if (compute_eigs) { 3693 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3694 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3695 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3696 lambda_max = eigs_r[neigs-1]; 3697 lambda_min = eigs_r[0]; 3698 if (pcbddc->use_coarse_estimates) { 3699 if (lambda_max>lambda_min) { 3700 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3701 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3702 } 3703 } 3704 } 3705 3706 /* check coarse problem residual error */ 3707 if (pcbddc->dbg_flag) { 3708 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3709 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3710 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3711 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3712 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3713 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3714 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3715 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3716 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3717 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3718 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3719 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3720 if (compute_eigs) { 3721 PetscReal lambda_max_s,lambda_min_s; 3722 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3723 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3724 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3725 for (i=0;i<neigs;i++) { 3726 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3727 } 3728 } 3729 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3730 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3731 } 3732 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3733 if (compute_eigs) { 3734 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3735 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3736 } 3737 } 3738 } 3739 /* print additional info */ 3740 if (pcbddc->dbg_flag) { 3741 /* waits until all processes reaches this point */ 3742 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3743 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3744 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3745 } 3746 3747 /* free memory */ 3748 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3749 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3750 PetscFunctionReturn(0); 3751 } 3752 3753 #undef __FUNCT__ 3754 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3755 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3756 { 3757 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3758 PC_IS* pcis = (PC_IS*)pc->data; 3759 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3760 PetscInt i,coarse_size; 3761 PetscInt *local_primal_indices; 3762 PetscErrorCode ierr; 3763 3764 PetscFunctionBegin; 3765 /* Compute global number of coarse dofs */ 3766 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3767 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3768 } 3769 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3770 3771 /* check numbering */ 3772 if (pcbddc->dbg_flag) { 3773 PetscScalar coarsesum,*array; 3774 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 3775 3776 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3777 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3778 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3779 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3780 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3781 for (i=0;i<pcbddc->local_primal_size;i++) { 3782 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3783 } 3784 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3785 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3786 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3787 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3788 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3789 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3790 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3791 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3792 for (i=0;i<pcis->n;i++) { 3793 if (array[i] == 1.0) { 3794 set_error = PETSC_TRUE; 3795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3796 } 3797 } 3798 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3799 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3800 for (i=0;i<pcis->n;i++) { 3801 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3802 } 3803 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3804 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3805 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3806 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3807 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3808 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3809 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 3810 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3811 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3812 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3813 for (i=0;i<pcbddc->local_primal_size;i++) { 3814 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3815 } 3816 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3817 } 3818 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3819 if (set_error_reduced) { 3820 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 3821 } 3822 } 3823 /* get back data */ 3824 *coarse_size_n = coarse_size; 3825 *local_primal_indices_n = local_primal_indices; 3826 PetscFunctionReturn(0); 3827 } 3828 3829 #undef __FUNCT__ 3830 #define __FUNCT__ "PCBDDCGlobalToLocal" 3831 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 3832 { 3833 IS localis_t; 3834 PetscInt i,lsize,*idxs,n; 3835 PetscScalar *vals; 3836 PetscErrorCode ierr; 3837 3838 PetscFunctionBegin; 3839 /* get indices in local ordering exploiting local to global map */ 3840 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3841 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3842 for (i=0;i<lsize;i++) vals[i] = 1.0; 3843 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3844 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 3845 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 3846 if (idxs) { /* multilevel guard */ 3847 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3848 } 3849 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 3850 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3851 ierr = PetscFree(vals);CHKERRQ(ierr); 3852 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 3853 /* now compute set in local ordering */ 3854 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3855 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3856 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 3857 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 3858 for (i=0,lsize=0;i<n;i++) { 3859 if (PetscRealPart(vals[i]) > 0.5) { 3860 lsize++; 3861 } 3862 } 3863 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3864 for (i=0,lsize=0;i<n;i++) { 3865 if (PetscRealPart(vals[i]) > 0.5) { 3866 idxs[lsize++] = i; 3867 } 3868 } 3869 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 3870 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3871 *localis = localis_t; 3872 PetscFunctionReturn(0); 3873 } 3874