1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 111 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 112 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 118 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 119 { 120 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 121 PC_IS *pcis = (PC_IS*)pc->data; 122 VecType impVecType; 123 PetscInt n_constraints,n_R,old_size; 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 if (!pcbddc->ConstraintMatrix) { 128 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 129 } 130 /* get sizes */ 131 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 132 n_R = pcis->n-pcbddc->n_actual_vertices; 133 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 134 /* local work vectors (try to avoid unneeded work)*/ 135 /* R nodes */ 136 old_size = -1; 137 if (pcbddc->vec1_R) { 138 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 139 } 140 if (n_R != old_size) { 141 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 143 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 144 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 145 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 146 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 147 } 148 /* local primal dofs */ 149 old_size = -1; 150 if (pcbddc->vec1_P) { 151 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 152 } 153 if (pcbddc->local_primal_size != old_size) { 154 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 156 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 157 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 158 } 159 /* local explicit constraints */ 160 old_size = -1; 161 if (pcbddc->vec1_C) { 162 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 163 } 164 if (n_constraints && n_constraints != old_size) { 165 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 167 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 168 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "PCBDDCSetUpCorrection" 175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 176 { 177 PetscErrorCode ierr; 178 /* pointers to pcis and pcbddc */ 179 PC_IS* pcis = (PC_IS*)pc->data; 180 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 181 /* submatrices of local problem */ 182 Mat A_RV,A_VR,A_VV; 183 /* working matrices */ 184 Mat M1,M2,M3,C_CR; 185 /* working vectors */ 186 Vec vec1_C,vec2_C,vec1_V,vec2_V; 187 /* additional working stuff */ 188 IS is_aux; 189 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 190 const PetscScalar *array,*row_cmat_values; 191 const PetscInt *row_cmat_indices,*idx_R_local; 192 PetscInt *idx_V_B,*auxindices; 193 PetscInt n_vertices,n_constraints,size_of_constraint; 194 PetscInt i,j,n_R,n_D,n_B; 195 PetscBool unsymmetric_check; 196 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 197 MatType impMatType; 198 /* some shortcuts to scalars */ 199 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 200 /* for debugging purposes */ 201 PetscReal *coarsefunctions_errors,*constraints_errors; 202 203 PetscFunctionBegin; 204 /* get number of vertices (corners plus constraints with change of basis) 205 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 206 n_vertices = pcbddc->n_actual_vertices; 207 n_constraints = pcbddc->local_primal_size-n_vertices; 208 /* Set Non-overlapping dimensions */ 209 n_B = pcis->n_B; n_D = pcis->n - n_B; 210 n_R = pcis->n-n_vertices; 211 212 /* Set types for local objects needed by BDDC precondtioner */ 213 impMatType = MATSEQDENSE; 214 215 /* Allocating some extra storage just to be safe */ 216 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 217 for (i=0;i<pcis->n;i++) auxindices[i]=i; 218 219 /* vertices in boundary numbering */ 220 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 221 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 222 if (i != n_vertices) { 223 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 224 } 225 226 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 227 if (n_constraints) { 228 /* see if we can save some allocations */ 229 if (pcbddc->local_auxmat2) { 230 PetscInt on_R,on_constraints; 231 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 232 if (on_R != n_R || on_constraints != n_constraints) { 233 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 234 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 235 } 236 } 237 /* work vectors */ 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 239 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 240 /* auxiliary matrices */ 241 if (!pcbddc->local_auxmat2) { 242 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 243 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 244 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 245 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 246 } 247 248 /* Extract constraints on R nodes: C_{CR} */ 249 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 250 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 251 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 252 253 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 254 for (i=0;i<n_constraints;i++) { 255 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 256 /* Get row of constraint matrix in R numbering */ 257 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 259 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 260 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 261 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 262 /* Solve for row of constraint matrix in R numbering */ 263 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 264 /* Set values in local_auxmat2 */ 265 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 267 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 268 } 269 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 272 273 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 274 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 275 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 276 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 277 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 278 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 279 ierr = MatSetUp(M1);CHKERRQ(ierr); 280 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 281 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 282 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 283 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 284 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 285 ierr = MatDestroy(&M2);CHKERRQ(ierr); 286 ierr = MatDestroy(&M3);CHKERRQ(ierr); 287 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 288 if (!pcbddc->local_auxmat1) { 289 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } else { 291 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 292 } 293 } 294 295 /* Get submatrices from subdomain matrix */ 296 if (n_vertices) { 297 PetscInt ibs,mbs; 298 PetscBool issbaij; 299 Mat newmat; 300 301 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 302 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 303 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 304 if (ibs != mbs) { /* need to convert to SEQAIJ */ 305 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 309 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 310 } else { 311 /* this is safe */ 312 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 313 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 314 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 315 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 316 /* which of the two approaches is faster? */ 317 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 319 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 320 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 321 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 322 } else { 323 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 324 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 325 } 326 } 327 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 328 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 329 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 330 } 331 332 /* Matrix of coarse basis functions (local) */ 333 if (pcbddc->coarse_phi_B) { 334 PetscInt on_B,on_primal; 335 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 336 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 337 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 338 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 339 } 340 } 341 if (pcbddc->coarse_phi_D) { 342 PetscInt on_D,on_primal; 343 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 344 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 345 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 346 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 347 } 348 } 349 if (!pcbddc->coarse_phi_B) { 350 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 351 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 352 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 353 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 } 355 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 356 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 357 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 358 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 359 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 } 361 362 if (pcbddc->dbg_flag) { 363 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 364 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 365 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 366 } 367 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 368 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 369 370 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 371 372 /* vertices */ 373 for (i=0;i<n_vertices;i++) { 374 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 375 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 376 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 377 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 378 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 379 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 380 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 381 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 383 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 384 if (n_constraints) { 385 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 386 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 387 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 388 } 389 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 390 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 391 392 /* Set values in coarse basis function and subdomain part of coarse_mat */ 393 /* coarse basis functions */ 394 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 395 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 399 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 400 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 401 if (pcbddc->switch_static || pcbddc->dbg_flag) { 402 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 406 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 407 } 408 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 409 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 412 if (n_constraints) { 413 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 416 } 417 418 /* check */ 419 if (pcbddc->dbg_flag) { 420 /* assemble subdomain vector on local nodes */ 421 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 422 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 425 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 427 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 428 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 429 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 430 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 433 if (n_constraints) { 434 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 437 } 438 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 441 /* check saddle point solution */ 442 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 444 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 445 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 446 /* shift by the identity matrix */ 447 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 448 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 451 } 452 } 453 454 /* constraints */ 455 for (i=0;i<n_constraints;i++) { 456 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 457 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 458 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 459 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 460 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 461 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 462 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 463 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 464 if (n_vertices) { 465 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 466 } 467 /* Set values in coarse basis function and subdomain part of coarse_mat */ 468 /* coarse basis functions */ 469 j = i+n_vertices; /* don't touch this! */ 470 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 471 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 475 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 476 if (pcbddc->switch_static || pcbddc->dbg_flag) { 477 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 481 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 482 } 483 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 484 if (n_vertices) { 485 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 487 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 488 } 489 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 492 493 if (pcbddc->dbg_flag) { 494 /* assemble subdomain vector on nodes */ 495 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 496 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 498 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 499 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 500 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 501 /* assemble subdomain vector of lagrange multipliers */ 502 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 503 if (n_vertices) { 504 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 506 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 507 } 508 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 510 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 511 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 513 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 514 /* check saddle point solution */ 515 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 517 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 518 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 519 /* shift by the identity matrix */ 520 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 521 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 523 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 524 } 525 } 526 /* call assembling routines for local coarse basis */ 527 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 if (pcbddc->switch_static || pcbddc->dbg_flag) { 530 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 } 533 534 /* compute other basis functions for non-symmetric problems */ 535 ierr = MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);CHKERRQ(ierr); 536 if (!pcbddc->issym) { 537 if (!pcbddc->coarse_psi_B) { 538 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 539 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 541 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 542 } 543 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 544 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 545 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 546 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 547 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 548 } 549 for (i=0;i<pcbddc->local_primal_size;i++) { 550 if (n_constraints) { 551 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 552 for (j=0;j<n_constraints;j++) { 553 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 554 } 555 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 556 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 557 } 558 if (i<n_vertices) { 559 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 560 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 561 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 562 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 563 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 564 if (n_constraints) { 565 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 566 } 567 } else { 568 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 569 } 570 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 571 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 572 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 574 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 576 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 577 if (i<n_vertices) { 578 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 579 } 580 if (pcbddc->switch_static || pcbddc->dbg_flag) { 581 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 583 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 585 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 586 } 587 588 if (pcbddc->dbg_flag) { 589 /* assemble subdomain vector on nodes */ 590 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 591 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 593 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 594 if (i<n_vertices) { 595 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 596 } 597 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 598 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 599 /* assemble subdomain vector of lagrange multipliers */ 600 for (j=0;j<pcbddc->local_primal_size;j++) { 601 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 602 } 603 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 604 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 605 /* check saddle point solution */ 606 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 608 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 609 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 610 /* shift by the identity matrix */ 611 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 612 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 614 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 615 } 616 } 617 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 if (pcbddc->switch_static || pcbddc->dbg_flag) { 620 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 622 } 623 unsymmetric_check = PETSC_TRUE; 624 } else { /* take references to already computed coarse basis */ 625 unsymmetric_check = PETSC_FALSE; 626 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 627 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 628 if (pcbddc->coarse_phi_D) { 629 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 630 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 631 } 632 } 633 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 634 /* Checking coarse_sub_mat and coarse basis functios */ 635 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 636 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 637 if (pcbddc->dbg_flag) { 638 Mat coarse_sub_mat; 639 Mat AUXMAT,TM1,TM2,TM3,TM4; 640 Mat coarse_phi_D,coarse_phi_B; 641 Mat coarse_psi_D,coarse_psi_B; 642 Mat A_II,A_BB,A_IB,A_BI; 643 MatType checkmattype=MATSEQAIJ; 644 PetscReal real_value; 645 646 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 648 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 649 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 650 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 651 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 652 if (unsymmetric_check) { 653 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 654 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 655 } 656 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 657 658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 660 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 661 if (unsymmetric_check) { 662 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 663 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 664 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 665 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 666 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 667 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 668 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 669 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 670 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 671 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 672 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 674 } else { 675 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 676 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 677 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 } 684 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 686 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 687 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 688 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 689 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 695 for (i=0;i<pcbddc->local_primal_size;i++) { 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 697 } 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 699 for (i=0;i<pcbddc->local_primal_size;i++) { 700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 701 } 702 if (unsymmetric_check) { 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 704 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 706 } 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 708 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 710 } 711 } 712 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 715 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 716 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 719 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 720 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 721 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 722 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 723 if (unsymmetric_check) { 724 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 725 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 726 } 727 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 728 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 729 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 730 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 731 } 732 /* free memory */ 733 if (n_vertices) { 734 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 735 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 737 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 738 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 739 } 740 if (n_constraints) { 741 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 742 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 743 ierr = MatDestroy(&M1);CHKERRQ(ierr); 744 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 745 } 746 ierr = PetscFree(auxindices);CHKERRQ(ierr); 747 /* get back data */ 748 *coarse_submat_vals_n = coarse_submat_vals; 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 754 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 755 { 756 PC_IS* pcis = (PC_IS*)(pc->data); 757 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 758 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 759 PetscBool issbaij,isseqaij; 760 /* manage repeated solves */ 761 MatReuse reuse; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 766 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 767 } 768 /* get mat flags */ 769 reuse = MAT_INITIAL_MATRIX; 770 if (pc->setupcalled) { 771 if (pc->flag == SAME_NONZERO_PATTERN) { 772 reuse = MAT_REUSE_MATRIX; 773 } else { 774 reuse = MAT_INITIAL_MATRIX; 775 } 776 } 777 if (reuse == MAT_INITIAL_MATRIX) { 778 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 779 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 780 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 781 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 783 } 784 785 /* transform local matrices if needed */ 786 if (pcbddc->ChangeOfBasisMatrix) { 787 Mat change_mat_all; 788 PetscScalar *row_cmat_values; 789 PetscInt *row_cmat_indices; 790 PetscInt *nnz,*is_indices,*temp_indices; 791 PetscInt i,j,k,n_D,n_B; 792 793 /* Get Non-overlapping dimensions */ 794 n_B = pcis->n_B; 795 n_D = pcis->n-n_B; 796 797 /* compute nonzero structure of change of basis on all local nodes */ 798 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 799 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 800 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 801 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 802 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 803 k=1; 804 for (i=0;i<n_B;i++) { 805 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 806 nnz[is_indices[i]]=j; 807 if (k < j) k = j; 808 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 809 } 810 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 811 /* assemble change of basis matrix on the whole set of local dofs */ 812 ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr); 813 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 814 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 815 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 816 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 818 for (i=0;i<n_D;i++) { 819 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 822 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 823 for (i=0;i<n_B;i++) { 824 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 825 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 826 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 827 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 828 } 829 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 831 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 832 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 833 if (isseqaij) { 834 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 835 } else { 836 Mat work_mat; 837 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 838 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 839 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 840 } 841 /* 842 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 843 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 844 */ 845 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 846 ierr = PetscFree(nnz);CHKERRQ(ierr); 847 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 848 } else { 849 /* without change of basis, the local matrix is unchanged */ 850 if (!pcbddc->local_mat) { 851 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 852 pcbddc->local_mat = matis->A; 853 } 854 } 855 856 /* get submatrices */ 857 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 858 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 859 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 860 if (!issbaij) { 861 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 862 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 863 } else { 864 Mat newmat; 865 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 866 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 867 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 868 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 869 } 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 875 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 876 { 877 PC_IS* pcis = (PC_IS*)(pc->data); 878 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 879 IS is_aux1,is_aux2; 880 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 881 PetscInt n_vertices,i,j,n_R,n_D,n_B; 882 PetscInt vbs,bs; 883 PetscBT bitmask; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 /* 888 No need to setup local scatters if 889 - primal space is unchanged 890 AND 891 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 892 AND 893 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 894 */ 895 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 896 PetscFunctionReturn(0); 897 } 898 /* destroy old objects */ 899 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 900 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 901 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 902 /* Set Non-overlapping dimensions */ 903 n_B = pcis->n_B; n_D = pcis->n - n_B; 904 n_vertices = pcbddc->n_actual_vertices; 905 /* create auxiliary bitmask */ 906 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 907 for (i=0;i<n_vertices;i++) { 908 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 909 } 910 911 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 912 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 913 for (i=0, n_R=0; i<pcis->n; i++) { 914 if (!PetscBTLookup(bitmask,i)) { 915 idx_R_local[n_R] = i; 916 n_R++; 917 } 918 } 919 920 /* Block code */ 921 vbs = 1; 922 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 923 if (bs>1 && !(n_vertices%bs)) { 924 PetscBool is_blocked = PETSC_TRUE; 925 PetscInt *vary; 926 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 927 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 928 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 929 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 930 for (i=0; i<n_vertices; i++) { 931 if (vary[i]!=0 && vary[i]!=bs) { 932 is_blocked = PETSC_FALSE; 933 break; 934 } 935 } 936 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 937 vbs = bs; 938 for (i=0;i<n_R/vbs;i++) { 939 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 940 } 941 } 942 ierr = PetscFree(vary);CHKERRQ(ierr); 943 } 944 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 945 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 946 947 /* print some info if requested */ 948 if (pcbddc->dbg_flag) { 949 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 951 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 952 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 956 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 957 } 958 959 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 960 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 961 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 962 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 963 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 964 for (i=0; i<n_D; i++) { 965 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 966 } 967 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 968 for (i=0, j=0; i<n_R; i++) { 969 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 970 aux_array1[j++] = i; 971 } 972 } 973 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 974 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 975 for (i=0, j=0; i<n_B; i++) { 976 if (!PetscBTLookup(bitmask,is_indices[i])) { 977 aux_array2[j++] = i; 978 } 979 } 980 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 981 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 982 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 983 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 984 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 985 986 if (pcbddc->switch_static || pcbddc->dbg_flag) { 987 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 988 for (i=0, j=0; i<n_R; i++) { 989 if (PetscBTLookup(bitmask,idx_R_local[i])) { 990 aux_array1[j++] = i; 991 } 992 } 993 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 994 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 995 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 996 } 997 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 998 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1005 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1006 { 1007 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1008 PC_IS *pcis = (PC_IS*)pc->data; 1009 PC pc_temp; 1010 Mat A_RR; 1011 MatReuse reuse; 1012 PetscScalar m_one = -1.0; 1013 PetscReal value; 1014 PetscInt n_D,n_R,ibs,mbs; 1015 PetscBool use_exact,use_exact_reduced,issbaij; 1016 PetscErrorCode ierr; 1017 /* prefixes stuff */ 1018 char dir_prefix[256],neu_prefix[256],str_level[16]; 1019 size_t len; 1020 1021 PetscFunctionBegin; 1022 1023 /* compute prefixes */ 1024 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1025 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1026 if (!pcbddc->current_level) { 1027 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1028 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1029 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1030 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1031 } else { 1032 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1033 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1034 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1035 len -= 15; /* remove "pc_bddc_coarse_" */ 1036 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1037 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1038 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1039 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1040 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1041 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1042 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1043 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1044 } 1045 1046 /* DIRICHLET PROBLEM */ 1047 /* Matrix for Dirichlet problem is pcis->A_II */ 1048 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1049 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1050 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1051 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1052 /* default */ 1053 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1054 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1055 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1056 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1057 if (issbaij) { 1058 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1059 } else { 1060 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1061 } 1062 /* Allow user's customization */ 1063 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1064 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1065 } 1066 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1067 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1068 if (!n_D) { 1069 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1070 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1071 } 1072 /* Set Up KSP for Dirichlet problem of BDDC */ 1073 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1074 /* set ksp_D into pcis data */ 1075 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1076 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1077 pcis->ksp_D = pcbddc->ksp_D; 1078 1079 /* NEUMANN PROBLEM */ 1080 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1081 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1082 if (pcbddc->ksp_R) { /* already created ksp */ 1083 PetscInt nn_R; 1084 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1085 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1086 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1087 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1088 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1089 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1090 reuse = MAT_INITIAL_MATRIX; 1091 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1092 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1093 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1094 reuse = MAT_INITIAL_MATRIX; 1095 } else { /* safe to reuse the matrix */ 1096 reuse = MAT_REUSE_MATRIX; 1097 } 1098 } 1099 /* last check */ 1100 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1101 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1102 reuse = MAT_INITIAL_MATRIX; 1103 } 1104 } else { /* first time, so we need to create the matrix */ 1105 reuse = MAT_INITIAL_MATRIX; 1106 } 1107 /* extract A_RR */ 1108 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1109 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1110 if (ibs != mbs) { 1111 Mat newmat; 1112 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1113 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1114 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1115 } else { 1116 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1117 } 1118 if (!pcbddc->ksp_R) { /* create object if not present */ 1119 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1120 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1121 /* default */ 1122 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1123 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1124 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1125 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1126 if (issbaij) { 1127 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1128 } else { 1129 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1130 } 1131 /* Allow user's customization */ 1132 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1133 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1134 } 1135 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1136 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1137 if (!n_R) { 1138 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1139 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1140 } 1141 /* Set Up KSP for Neumann problem of BDDC */ 1142 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1143 1144 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1145 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1146 /* Dirichlet */ 1147 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1148 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1149 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1150 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1151 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1152 /* need to be adapted? */ 1153 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1154 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1155 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1156 /* print info */ 1157 if (pcbddc->dbg_flag) { 1158 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1159 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1162 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1163 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1164 } 1165 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1166 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1167 } 1168 1169 /* Neumann */ 1170 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1171 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1172 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1173 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1174 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1175 /* need to be adapted? */ 1176 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1177 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1178 /* print info */ 1179 if (pcbddc->dbg_flag) { 1180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1182 } 1183 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1184 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1185 } 1186 } 1187 /* free Neumann problem's matrix */ 1188 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1194 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1195 { 1196 PetscErrorCode ierr; 1197 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1198 1199 PetscFunctionBegin; 1200 if (applytranspose) { 1201 if (pcbddc->local_auxmat1) { 1202 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1203 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1204 } 1205 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1206 } else { 1207 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1208 if (pcbddc->local_auxmat1) { 1209 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1210 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1211 } 1212 } 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1219 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1220 { 1221 PetscErrorCode ierr; 1222 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1223 PC_IS* pcis = (PC_IS*) (pc->data); 1224 const PetscScalar zero = 0.0; 1225 1226 PetscFunctionBegin; 1227 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1228 if (applytranspose) { 1229 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1230 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1231 } else { 1232 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1233 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1234 } 1235 /* start communications from local primal nodes to rhs of coarse solver */ 1236 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1237 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1241 /* TODO remove null space when doing multilevel */ 1242 if (pcbddc->coarse_ksp) { 1243 if (applytranspose) { 1244 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1245 } else { 1246 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1247 } 1248 } 1249 /* start communications from coarse solver solution to local primal nodes */ 1250 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1251 1252 /* Local solution on R nodes */ 1253 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1254 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1255 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 if (pcbddc->switch_static) { 1257 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1259 } 1260 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1261 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1262 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 if (pcbddc->switch_static) { 1265 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 } 1268 1269 /* complete communications from coarse sol to local primal nodes */ 1270 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1271 1272 /* Sum contributions from two levels */ 1273 if (applytranspose) { 1274 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1275 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1276 } else { 1277 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1278 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 1283 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1286 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1287 { 1288 PetscErrorCode ierr; 1289 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1290 PetscScalar *array,*array2; 1291 Vec from,to; 1292 1293 PetscFunctionBegin; 1294 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1295 from = pcbddc->coarse_vec; 1296 to = pcbddc->vec1_P; 1297 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1298 Vec tvec; 1299 PetscInt lsize; 1300 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1301 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1302 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1303 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1304 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1305 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1306 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1307 } 1308 } else { /* from local to global -> put data in coarse right hand side */ 1309 from = pcbddc->vec1_P; 1310 to = pcbddc->coarse_vec; 1311 } 1312 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1318 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1319 { 1320 PetscErrorCode ierr; 1321 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1322 PetscScalar *array,*array2; 1323 Vec from,to; 1324 1325 PetscFunctionBegin; 1326 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1327 from = pcbddc->coarse_vec; 1328 to = pcbddc->vec1_P; 1329 } else { /* from local to global -> put data in coarse right hand side */ 1330 from = pcbddc->vec1_P; 1331 to = pcbddc->coarse_vec; 1332 } 1333 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1334 if (smode == SCATTER_FORWARD) { 1335 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1336 Vec tvec; 1337 PetscInt lsize; 1338 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1339 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1340 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1341 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1342 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1343 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1344 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1345 } 1346 } 1347 PetscFunctionReturn(0); 1348 } 1349 1350 /* uncomment for testing purposes */ 1351 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1354 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1355 { 1356 PetscErrorCode ierr; 1357 PC_IS* pcis = (PC_IS*)(pc->data); 1358 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1359 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1360 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1361 MatType impMatType=MATSEQAIJ; 1362 /* one and zero */ 1363 PetscScalar one=1.0,zero=0.0; 1364 /* space to store constraints and their local indices */ 1365 PetscScalar *temp_quadrature_constraint; 1366 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1367 /* iterators */ 1368 PetscInt i,j,k,total_counts,temp_start_ptr; 1369 /* stuff to store connected components stored in pcbddc->mat_graph */ 1370 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1371 PetscInt n_ISForFaces,n_ISForEdges; 1372 /* near null space stuff */ 1373 MatNullSpace nearnullsp; 1374 const Vec *nearnullvecs; 1375 Vec *localnearnullsp; 1376 PetscBool nnsp_has_cnst; 1377 PetscInt nnsp_size; 1378 PetscScalar *array; 1379 /* BLAS integers */ 1380 PetscBLASInt lwork,lierr; 1381 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1382 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1383 /* LAPACK working arrays for SVD or POD */ 1384 PetscBool skip_lapack; 1385 PetscScalar *work; 1386 PetscReal *singular_vals; 1387 #if defined(PETSC_USE_COMPLEX) 1388 PetscReal *rwork; 1389 #endif 1390 #if defined(PETSC_MISSING_LAPACK_GESVD) 1391 PetscBLASInt Blas_one_2=1; 1392 PetscScalar *temp_basis,*correlation_mat; 1393 #else 1394 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1395 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1396 #endif 1397 /* reuse */ 1398 PetscInt olocal_primal_size; 1399 PetscInt *oprimal_indices_local_idxs; 1400 /* change of basis */ 1401 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1402 PetscBool boolforchange,qr_needed; 1403 PetscBT touched,change_basis,qr_needed_idx; 1404 /* auxiliary stuff */ 1405 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1406 /* some quantities */ 1407 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1408 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1409 1410 1411 PetscFunctionBegin; 1412 /* Destroy Mat objects computed previously */ 1413 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1414 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1415 /* Get index sets for faces, edges and vertices from graph */ 1416 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1417 pcbddc->use_vertices = PETSC_TRUE; 1418 } 1419 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1420 /* HACKS (the two following code branches) */ 1421 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1422 pcbddc->use_change_of_basis = PETSC_TRUE; 1423 pcbddc->use_change_on_faces = PETSC_FALSE; 1424 } 1425 if (pcbddc->NullSpace) { 1426 /* use_change_of_basis should be consistent among processors */ 1427 PetscBool tbool = pcbddc->use_change_of_basis; 1428 ierr = MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1429 } 1430 /* print some info */ 1431 if (pcbddc->dbg_flag) { 1432 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1434 i = 0; 1435 if (ISForVertices) { 1436 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1437 } 1438 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1439 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1440 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1441 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1442 } 1443 /* check if near null space is attached to global mat */ 1444 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1445 if (nearnullsp) { 1446 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1447 /* remove any stored info */ 1448 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1449 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1450 /* store information for BDDC solver reuse */ 1451 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1452 pcbddc->onearnullspace = nearnullsp; 1453 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1454 for (i=0;i<nnsp_size;i++) { 1455 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1456 } 1457 } else { /* if near null space is not provided BDDC uses constants by default */ 1458 nnsp_size = 0; 1459 nnsp_has_cnst = PETSC_TRUE; 1460 } 1461 /* get max number of constraints on a single cc */ 1462 max_constraints = nnsp_size; 1463 if (nnsp_has_cnst) max_constraints++; 1464 1465 /* 1466 Evaluate maximum storage size needed by the procedure 1467 - temp_indices will contain start index of each constraint stored as follows 1468 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1469 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1470 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1471 */ 1472 total_counts = n_ISForFaces+n_ISForEdges; 1473 total_counts *= max_constraints; 1474 n_vertices = 0; 1475 if (ISForVertices) { 1476 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1477 } 1478 total_counts += n_vertices; 1479 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1480 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1481 total_counts = 0; 1482 max_size_of_constraint = 0; 1483 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1484 if (i<n_ISForEdges) { 1485 used_IS = &ISForEdges[i]; 1486 } else { 1487 used_IS = &ISForFaces[i-n_ISForEdges]; 1488 } 1489 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1490 total_counts += j; 1491 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1492 } 1493 total_counts *= max_constraints; 1494 total_counts += n_vertices; 1495 ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr); 1496 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr); 1497 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1498 /* get local part of global near null space vectors */ 1499 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1500 for (k=0;k<nnsp_size;k++) { 1501 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1502 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1504 } 1505 1506 /* whether or not to skip lapack calls */ 1507 skip_lapack = PETSC_TRUE; 1508 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1509 1510 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1511 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1512 PetscScalar temp_work; 1513 #if defined(PETSC_MISSING_LAPACK_GESVD) 1514 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1515 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1516 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1517 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1518 #if defined(PETSC_USE_COMPLEX) 1519 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1520 #endif 1521 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1522 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1523 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1524 lwork = -1; 1525 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1526 #if !defined(PETSC_USE_COMPLEX) 1527 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1528 #else 1529 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1530 #endif 1531 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1532 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1533 #else /* on missing GESVD */ 1534 /* SVD */ 1535 PetscInt max_n,min_n; 1536 max_n = max_size_of_constraint; 1537 min_n = max_constraints; 1538 if (max_size_of_constraint < max_constraints) { 1539 min_n = max_size_of_constraint; 1540 max_n = max_constraints; 1541 } 1542 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1543 #if defined(PETSC_USE_COMPLEX) 1544 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1545 #endif 1546 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1547 lwork = -1; 1548 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1549 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1550 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1552 #if !defined(PETSC_USE_COMPLEX) 1553 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1554 #else 1555 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1556 #endif 1557 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1558 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1559 #endif /* on missing GESVD */ 1560 /* Allocate optimal workspace */ 1561 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1562 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1563 } 1564 /* Now we can loop on constraining sets */ 1565 total_counts = 0; 1566 temp_indices[0] = 0; 1567 /* vertices */ 1568 if (ISForVertices) { 1569 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1570 if (nnsp_has_cnst) { /* consider all vertices */ 1571 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1572 for (i=0;i<n_vertices;i++) { 1573 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1574 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1575 total_counts++; 1576 } 1577 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1578 PetscBool used_vertex; 1579 for (i=0;i<n_vertices;i++) { 1580 used_vertex = PETSC_FALSE; 1581 k = 0; 1582 while (!used_vertex && k<nnsp_size) { 1583 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1584 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1585 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1586 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1587 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1588 total_counts++; 1589 used_vertex = PETSC_TRUE; 1590 } 1591 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1592 k++; 1593 } 1594 } 1595 } 1596 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1597 n_vertices = total_counts; 1598 } 1599 1600 /* edges and faces */ 1601 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1602 if (i<n_ISForEdges) { 1603 used_IS = &ISForEdges[i]; 1604 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1605 } else { 1606 used_IS = &ISForFaces[i-n_ISForEdges]; 1607 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1608 } 1609 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1610 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1611 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1612 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1613 /* change of basis should not be performed on local periodic nodes */ 1614 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1615 if (nnsp_has_cnst) { 1616 PetscScalar quad_value; 1617 temp_constraints++; 1618 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1619 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1620 for (j=0;j<size_of_constraint;j++) { 1621 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1622 } 1623 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1624 total_counts++; 1625 } 1626 for (k=0;k<nnsp_size;k++) { 1627 PetscReal real_value; 1628 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1629 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1630 for (j=0;j<size_of_constraint;j++) { 1631 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1632 } 1633 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1634 /* check if array is null on the connected component */ 1635 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1636 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1637 if (real_value > 0.0) { /* keep indices and values */ 1638 temp_constraints++; 1639 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1640 total_counts++; 1641 } 1642 } 1643 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1644 valid_constraints = temp_constraints; 1645 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1646 if (!pcbddc->use_nnsp_true && temp_constraints) { 1647 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1648 1649 #if defined(PETSC_MISSING_LAPACK_GESVD) 1650 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1651 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1652 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1653 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1654 from that computed using LAPACKgesvd 1655 -> This is due to a different computation of eigenvectors in LAPACKheev 1656 -> The quality of the POD-computed basis will be the same */ 1657 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1658 /* Store upper triangular part of correlation matrix */ 1659 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1660 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1661 for (j=0;j<temp_constraints;j++) { 1662 for (k=0;k<j+1;k++) { 1663 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1664 } 1665 } 1666 /* compute eigenvalues and eigenvectors of correlation matrix */ 1667 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1668 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1669 #if !defined(PETSC_USE_COMPLEX) 1670 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1671 #else 1672 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1673 #endif 1674 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1675 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1676 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1677 j=0; 1678 while (j < temp_constraints && singular_vals[j] < tol) j++; 1679 total_counts=total_counts-j; 1680 valid_constraints = temp_constraints-j; 1681 /* scale and copy POD basis into used quadrature memory */ 1682 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1683 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1684 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1685 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1686 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1687 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1688 if (j<temp_constraints) { 1689 PetscInt ii; 1690 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1691 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1692 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1693 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1694 for (k=0;k<temp_constraints-j;k++) { 1695 for (ii=0;ii<size_of_constraint;ii++) { 1696 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1697 } 1698 } 1699 } 1700 #else /* on missing GESVD */ 1701 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1702 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1704 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1705 #if !defined(PETSC_USE_COMPLEX) 1706 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1707 #else 1708 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1709 #endif 1710 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1711 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1712 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1713 k = temp_constraints; 1714 if (k > size_of_constraint) k = size_of_constraint; 1715 j = 0; 1716 while (j < k && singular_vals[k-j-1] < tol) j++; 1717 total_counts = total_counts-temp_constraints+k-j; 1718 valid_constraints = k-j; 1719 #endif /* on missing GESVD */ 1720 } 1721 /* setting change_of_basis flag is safe now */ 1722 if (boolforchange) { 1723 for (j=0;j<valid_constraints;j++) { 1724 PetscBTSet(change_basis,total_counts-j-1); 1725 } 1726 } 1727 } 1728 /* free index sets of faces, edges and vertices */ 1729 for (i=0;i<n_ISForFaces;i++) { 1730 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1731 } 1732 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1733 for (i=0;i<n_ISForEdges;i++) { 1734 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1735 } 1736 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1737 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1738 /* map temp_indices_to_constraint in boundary numbering */ 1739 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1740 if (i != temp_indices[total_counts]) { 1741 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1742 } 1743 1744 /* free workspace */ 1745 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1746 ierr = PetscFree(work);CHKERRQ(ierr); 1747 #if defined(PETSC_USE_COMPLEX) 1748 ierr = PetscFree(rwork);CHKERRQ(ierr); 1749 #endif 1750 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1751 #if defined(PETSC_MISSING_LAPACK_GESVD) 1752 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1753 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1754 #endif 1755 } 1756 for (k=0;k<nnsp_size;k++) { 1757 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1758 } 1759 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1760 1761 /* set quantities in pcbddc data structure and store previous primal size */ 1762 /* n_vertices defines the number of subdomain corners in the primal space */ 1763 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1764 olocal_primal_size = pcbddc->local_primal_size; 1765 pcbddc->local_primal_size = total_counts; 1766 pcbddc->n_vertices = n_vertices; 1767 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1768 1769 /* Create constraint matrix */ 1770 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1771 /* so we need to set it up properly either with or without change of basis */ 1772 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1773 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1774 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1775 /* array to compute a local numbering of constraints : vertices first then constraints */ 1776 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1777 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1778 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1779 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1780 /* auxiliary stuff for basis change */ 1781 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1782 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1783 1784 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1785 total_primal_vertices=0; 1786 for (i=0;i<pcbddc->local_primal_size;i++) { 1787 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1788 if (size_of_constraint == 1) { 1789 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1790 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1791 aux_primal_minloc[total_primal_vertices]=0; 1792 total_primal_vertices++; 1793 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1794 PetscInt min_loc,min_index; 1795 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1796 /* find first untouched local node */ 1797 k = 0; 1798 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1799 min_index = global_indices[k]; 1800 min_loc = k; 1801 /* search the minimum among global nodes already untouched on the cc */ 1802 for (k=1;k<size_of_constraint;k++) { 1803 /* there can be more than one constraint on a single connected component */ 1804 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1805 min_index = global_indices[k]; 1806 min_loc = k; 1807 } 1808 } 1809 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1810 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1811 aux_primal_minloc[total_primal_vertices]=min_loc; 1812 total_primal_vertices++; 1813 } 1814 } 1815 /* determine if a QR strategy is needed for change of basis */ 1816 qr_needed = PETSC_FALSE; 1817 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1818 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1819 if (PetscBTLookup(change_basis,i)) { 1820 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1821 j = 0; 1822 for (k=0;k<size_of_constraint;k++) { 1823 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1824 j++; 1825 } 1826 } 1827 /* found more than one primal dof on the cc */ 1828 if (j > 1) { 1829 PetscBTSet(qr_needed_idx,i); 1830 qr_needed = PETSC_TRUE; 1831 } 1832 } 1833 } 1834 /* free workspace */ 1835 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1836 1837 /* permute indices in order to have a sorted set of vertices */ 1838 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1839 1840 /* nonzero structure of constraint matrix */ 1841 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1842 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1843 j=total_primal_vertices; 1844 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1845 if (!PetscBTLookup(change_basis,i)) { 1846 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1847 j++; 1848 } 1849 } 1850 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1851 ierr = PetscFree(nnz);CHKERRQ(ierr); 1852 /* set values in constraint matrix */ 1853 for (i=0;i<total_primal_vertices;i++) { 1854 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1855 } 1856 total_counts = total_primal_vertices; 1857 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1858 if (!PetscBTLookup(change_basis,i)) { 1859 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1860 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1861 total_counts++; 1862 } 1863 } 1864 /* assembling */ 1865 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1866 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1867 /* 1868 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1869 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1870 */ 1871 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1872 if (pcbddc->use_change_of_basis) { 1873 /* dual and primal dofs on a single cc */ 1874 PetscInt dual_dofs,primal_dofs; 1875 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1876 PetscInt primal_counter; 1877 /* working stuff for GEQRF */ 1878 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1879 PetscBLASInt lqr_work; 1880 /* working stuff for UNGQR */ 1881 PetscScalar *gqr_work,lgqr_work_t; 1882 PetscBLASInt lgqr_work; 1883 /* working stuff for TRTRS */ 1884 PetscScalar *trs_rhs; 1885 PetscBLASInt Blas_NRHS; 1886 /* pointers for values insertion into change of basis matrix */ 1887 PetscInt *start_rows,*start_cols; 1888 PetscScalar *start_vals; 1889 /* working stuff for values insertion */ 1890 PetscBT is_primal; 1891 1892 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1893 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1894 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1895 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1896 /* work arrays */ 1897 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1898 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1899 /* nonzeros per row */ 1900 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1901 if (PetscBTLookup(change_basis,i)) { 1902 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1903 if (PetscBTLookup(qr_needed_idx,i)) { 1904 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1905 } else { 1906 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1907 /* get local primal index on the cc */ 1908 j = 0; 1909 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1910 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1911 } 1912 } 1913 } 1914 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1915 ierr = PetscFree(nnz);CHKERRQ(ierr); 1916 /* Set initial identity in the matrix */ 1917 for (i=0;i<pcis->n_B;i++) { 1918 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1919 } 1920 1921 if (pcbddc->dbg_flag) { 1922 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1923 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1924 } 1925 1926 1927 /* Now we loop on the constraints which need a change of basis */ 1928 /* 1929 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1930 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1931 1932 Basic blocks of change of basis matrix T computed 1933 1934 - Using the following block transformation if there is only a primal dof on the cc 1935 (in the example, primal dof is the last one of the edge in LOCAL ordering 1936 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1937 | 1 0 ... 0 1 | 1938 | 0 1 ... 0 1 | 1939 | ... | 1940 | 0 ... 1 1 | 1941 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1942 1943 - via QR decomposition of constraints otherwise 1944 */ 1945 if (qr_needed) { 1946 /* space to store Q */ 1947 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 1948 /* first we issue queries for optimal work */ 1949 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1950 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1951 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1952 lqr_work = -1; 1953 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1954 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1955 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1956 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 1957 lgqr_work = -1; 1958 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1959 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1960 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1961 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1962 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1963 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1964 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1965 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1966 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 1967 /* array to store scaling factors for reflectors */ 1968 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 1969 /* array to store rhs and solution of triangular solver */ 1970 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 1971 /* allocating workspace for check */ 1972 if (pcbddc->dbg_flag) { 1973 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 1974 } 1975 } 1976 /* array to store whether a node is primal or not */ 1977 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1978 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 1979 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1980 if (i != total_primal_vertices) { 1981 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1982 } 1983 for (i=0;i<total_primal_vertices;i++) { 1984 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1985 } 1986 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1987 1988 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1989 /* -> using implicit ordering contained in temp_indices data */ 1990 total_counts = pcbddc->n_vertices; 1991 primal_counter = total_counts; 1992 while (total_counts<pcbddc->local_primal_size) { 1993 primal_dofs = 1; 1994 if (PetscBTLookup(change_basis,total_counts)) { 1995 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1996 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1997 primal_dofs++; 1998 } 1999 /* get constraint info */ 2000 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2001 dual_dofs = size_of_constraint-primal_dofs; 2002 2003 if (pcbddc->dbg_flag) { 2004 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2005 } 2006 2007 if (primal_dofs > 1) { /* QR */ 2008 2009 /* copy quadrature constraints for change of basis check */ 2010 if (pcbddc->dbg_flag) { 2011 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2012 } 2013 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2014 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2015 2016 /* compute QR decomposition of constraints */ 2017 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2018 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2020 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2021 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2022 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2023 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2024 2025 /* explictly compute R^-T */ 2026 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2027 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2028 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2029 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2030 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2031 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2032 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2033 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2034 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2035 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2036 2037 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2038 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2039 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2040 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2041 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2042 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2043 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2044 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2045 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2046 2047 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2048 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2049 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2050 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2051 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2052 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2053 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2054 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2056 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2057 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2058 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2059 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2060 2061 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2062 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2063 /* insert cols for primal dofs */ 2064 for (j=0;j<primal_dofs;j++) { 2065 start_vals = &qr_basis[j*size_of_constraint]; 2066 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2067 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2068 } 2069 /* insert cols for dual dofs */ 2070 for (j=0,k=0;j<dual_dofs;k++) { 2071 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2072 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2073 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2074 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2075 j++; 2076 } 2077 } 2078 2079 /* check change of basis */ 2080 if (pcbddc->dbg_flag) { 2081 PetscInt ii,jj; 2082 PetscBool valid_qr=PETSC_TRUE; 2083 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2084 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2085 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2089 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2090 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2091 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2092 for (jj=0;jj<size_of_constraint;jj++) { 2093 for (ii=0;ii<primal_dofs;ii++) { 2094 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2095 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2096 } 2097 } 2098 if (!valid_qr) { 2099 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2100 for (jj=0;jj<size_of_constraint;jj++) { 2101 for (ii=0;ii<primal_dofs;ii++) { 2102 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2103 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2104 } 2105 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2106 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2107 } 2108 } 2109 } 2110 } else { 2111 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2112 } 2113 } 2114 } else { /* simple transformation block */ 2115 PetscInt row,col; 2116 PetscScalar val; 2117 for (j=0;j<size_of_constraint;j++) { 2118 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2119 if (!PetscBTLookup(is_primal,row)) { 2120 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2121 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2122 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2123 } else { 2124 for (k=0;k<size_of_constraint;k++) { 2125 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2126 if (row != col) { 2127 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2128 } else { 2129 val = 1.0; 2130 } 2131 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2132 } 2133 } 2134 } 2135 if (pcbddc->dbg_flag) { 2136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2137 } 2138 } 2139 /* increment primal counter */ 2140 primal_counter += primal_dofs; 2141 } else { 2142 if (pcbddc->dbg_flag) { 2143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2144 } 2145 } 2146 /* increment constraint counter total_counts */ 2147 total_counts += primal_dofs; 2148 } 2149 2150 /* free workspace */ 2151 if (qr_needed) { 2152 if (pcbddc->dbg_flag) { 2153 ierr = PetscFree(work);CHKERRQ(ierr); 2154 } 2155 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2156 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2157 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2158 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2159 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2160 } 2161 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2162 /* assembling */ 2163 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2164 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2165 /* 2166 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2167 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2168 */ 2169 } 2170 /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */ 2171 if (pcbddc->user_ChangeOfBasisMatrix) { 2172 PetscInt rows,cols; 2173 ierr = MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);CHKERRQ(ierr); 2174 if (rows == pcis->n && cols == pcis->n) { 2175 ierr = MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2176 } else { 2177 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2178 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2179 } 2180 } 2181 2182 /* get indices in local ordering for vertices and constraints */ 2183 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2184 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2185 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2186 } 2187 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2188 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2189 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2190 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2191 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2192 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2193 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2194 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2195 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2196 /* set quantities in PCBDDC data struct */ 2197 pcbddc->n_actual_vertices = i; 2198 /* check if a new primal space has been introduced */ 2199 pcbddc->new_primal_space_local = PETSC_TRUE; 2200 if (olocal_primal_size == pcbddc->local_primal_size) { 2201 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2202 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2203 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2204 } 2205 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2206 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2207 2208 /* flush dbg viewer */ 2209 if (pcbddc->dbg_flag) { 2210 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2211 } 2212 2213 /* free workspace */ 2214 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2215 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2216 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2217 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2218 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2219 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2220 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2221 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 #undef __FUNCT__ 2226 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2227 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2228 { 2229 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2230 PC_IS *pcis = (PC_IS*)pc->data; 2231 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2232 PetscInt ierr,i,vertex_size; 2233 PetscViewer viewer=pcbddc->dbg_viewer; 2234 2235 PetscFunctionBegin; 2236 /* Reset previously computed graph */ 2237 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2238 /* Init local Graph struct */ 2239 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2240 2241 /* Check validity of the csr graph passed in by the user */ 2242 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2243 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2244 } 2245 2246 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2247 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2248 Mat mat_adj; 2249 const PetscInt *xadj,*adjncy; 2250 PetscBool flg_row=PETSC_TRUE; 2251 2252 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2253 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2254 if (!flg_row) { 2255 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2256 } 2257 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2258 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2259 if (!flg_row) { 2260 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2261 } 2262 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2263 } 2264 2265 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2266 vertex_size = 1; 2267 if (pcbddc->user_provided_isfordofs) { 2268 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2269 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2270 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2271 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2272 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2273 } 2274 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2275 pcbddc->n_ISForDofs = 0; 2276 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2277 } 2278 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2279 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2280 } else { 2281 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2282 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2283 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2284 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2285 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2286 } 2287 } 2288 } 2289 2290 /* Setup of Graph */ 2291 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2292 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2293 } 2294 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2295 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2296 } 2297 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2298 2299 /* Graph's connected components analysis */ 2300 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2301 2302 /* print some info to stdout */ 2303 if (pcbddc->dbg_flag) { 2304 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2305 } 2306 2307 /* mark topography has done */ 2308 pcbddc->recompute_topography = PETSC_FALSE; 2309 PetscFunctionReturn(0); 2310 } 2311 2312 #undef __FUNCT__ 2313 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2314 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2315 { 2316 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2317 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2318 PetscErrorCode ierr; 2319 2320 PetscFunctionBegin; 2321 n = 0; 2322 vertices = 0; 2323 if (pcbddc->ConstraintMatrix) { 2324 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2325 for (i=0;i<local_primal_size;i++) { 2326 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2327 if (size_of_constraint == 1) n++; 2328 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2329 } 2330 if (vertices_idx) { 2331 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2332 n = 0; 2333 for (i=0;i<local_primal_size;i++) { 2334 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2335 if (size_of_constraint == 1) { 2336 vertices[n++]=row_cmat_indices[0]; 2337 } 2338 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2339 } 2340 } 2341 } 2342 *n_vertices = n; 2343 if (vertices_idx) *vertices_idx = vertices; 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2349 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2350 { 2351 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2352 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2353 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2354 PetscBT touched; 2355 PetscErrorCode ierr; 2356 2357 /* This function assumes that the number of local constraints per connected component 2358 is not greater than the number of nodes defined for the connected component 2359 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2360 PetscFunctionBegin; 2361 n = 0; 2362 constraints_index = 0; 2363 if (pcbddc->ConstraintMatrix) { 2364 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2365 max_size_of_constraint = 0; 2366 for (i=0;i<local_primal_size;i++) { 2367 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2368 if (size_of_constraint > 1) { 2369 n++; 2370 } 2371 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2372 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2373 } 2374 if (constraints_idx) { 2375 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2376 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2377 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2378 n = 0; 2379 for (i=0;i<local_primal_size;i++) { 2380 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2381 if (size_of_constraint > 1) { 2382 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2383 /* find first untouched local node */ 2384 j = 0; 2385 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2386 min_index = row_cmat_global_indices[j]; 2387 min_loc = j; 2388 /* search the minimum among nodes not yet touched on the connected component 2389 since there can be more than one constraint on a single cc */ 2390 for (j=1;j<size_of_constraint;j++) { 2391 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2392 min_index = row_cmat_global_indices[j]; 2393 min_loc = j; 2394 } 2395 } 2396 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2397 constraints_index[n++] = row_cmat_indices[min_loc]; 2398 } 2399 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2400 } 2401 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2402 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2403 } 2404 } 2405 *n_constraints = n; 2406 if (constraints_idx) *constraints_idx = constraints_index; 2407 PetscFunctionReturn(0); 2408 } 2409 2410 /* the next two functions has been adapted from pcis.c */ 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "PCBDDCApplySchur" 2413 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2414 { 2415 PetscErrorCode ierr; 2416 PC_IS *pcis = (PC_IS*)(pc->data); 2417 2418 PetscFunctionBegin; 2419 if (!vec2_B) { vec2_B = v; } 2420 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2421 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2422 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2423 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2424 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2430 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2431 { 2432 PetscErrorCode ierr; 2433 PC_IS *pcis = (PC_IS*)(pc->data); 2434 2435 PetscFunctionBegin; 2436 if (!vec2_B) { vec2_B = v; } 2437 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2438 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2439 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2440 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2441 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 #undef __FUNCT__ 2446 #define __FUNCT__ "PCBDDCSubsetNumbering" 2447 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2448 { 2449 Vec local_vec,global_vec; 2450 IS seqis,paris; 2451 VecScatter scatter_ctx; 2452 PetscScalar *array; 2453 PetscInt *temp_global_dofs; 2454 PetscScalar globalsum; 2455 PetscInt i,j,s; 2456 PetscInt nlocals,first_index,old_index,max_local; 2457 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2458 PetscMPIInt *dof_sizes,*dof_displs; 2459 PetscBool first_found; 2460 PetscErrorCode ierr; 2461 2462 PetscFunctionBegin; 2463 /* mpi buffers */ 2464 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2465 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2466 j = ( !rank_prec_comm ? size_prec_comm : 0); 2467 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2468 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2469 /* get maximum size of subset */ 2470 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2471 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2472 max_local = 0; 2473 for (i=0;i<n_local_dofs;i++) { 2474 if (max_local < temp_global_dofs[i] ) { 2475 max_local = temp_global_dofs[i]; 2476 } 2477 } 2478 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2479 max_global++; 2480 max_local = 0; 2481 for (i=0;i<n_local_dofs;i++) { 2482 if (max_local < local_dofs[i] ) { 2483 max_local = local_dofs[i]; 2484 } 2485 } 2486 max_local++; 2487 /* allocate workspace */ 2488 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2489 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2490 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2491 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2492 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2493 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2494 /* create scatter */ 2495 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2496 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2497 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2498 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2499 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2500 /* init array */ 2501 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2502 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2503 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2504 if (local_dofs_mult) { 2505 for (i=0;i<n_local_dofs;i++) { 2506 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2507 } 2508 } else { 2509 for (i=0;i<n_local_dofs;i++) { 2510 array[local_dofs[i]]=1.0; 2511 } 2512 } 2513 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2514 /* scatter into global vec and get total number of global dofs */ 2515 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2516 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2517 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2518 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2519 /* Fill global_vec with cumulative function for global numbering */ 2520 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2521 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2522 nlocals = 0; 2523 first_index = -1; 2524 first_found = PETSC_FALSE; 2525 for (i=0;i<s;i++) { 2526 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2527 first_found = PETSC_TRUE; 2528 first_index = i; 2529 } 2530 nlocals += (PetscInt)PetscRealPart(array[i]); 2531 } 2532 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2533 if (!rank_prec_comm) { 2534 dof_displs[0]=0; 2535 for (i=1;i<size_prec_comm;i++) { 2536 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2537 } 2538 } 2539 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2540 if (first_found) { 2541 array[first_index] += (PetscScalar)nlocals; 2542 old_index = first_index; 2543 for (i=first_index+1;i<s;i++) { 2544 if (PetscRealPart(array[i]) > 0.1) { 2545 array[i] += array[old_index]; 2546 old_index = i; 2547 } 2548 } 2549 } 2550 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2551 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2552 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2553 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2554 /* get global ordering of local dofs */ 2555 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2556 if (local_dofs_mult) { 2557 for (i=0;i<n_local_dofs;i++) { 2558 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2559 } 2560 } else { 2561 for (i=0;i<n_local_dofs;i++) { 2562 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2563 } 2564 } 2565 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2566 /* free workspace */ 2567 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2568 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2569 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2570 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2571 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2572 /* return pointer to global ordering of local dofs */ 2573 *global_numbering_subset = temp_global_dofs; 2574 PetscFunctionReturn(0); 2575 } 2576 2577 #undef __FUNCT__ 2578 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2579 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2580 { 2581 PetscInt i,j; 2582 PetscScalar *alphas; 2583 PetscErrorCode ierr; 2584 2585 PetscFunctionBegin; 2586 /* this implements stabilized Gram-Schmidt */ 2587 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2588 for (i=0;i<n;i++) { 2589 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2590 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2591 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2592 } 2593 ierr = PetscFree(alphas);CHKERRQ(ierr); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 #undef __FUNCT__ 2598 #define __FUNCT__ "MatISGetSubassemblingPattern" 2599 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2600 { 2601 Mat subdomain_adj; 2602 IS new_ranks,ranks_send_to; 2603 MatPartitioning partitioner; 2604 Mat_IS *matis; 2605 PetscInt n_neighs,*neighs,*n_shared,**shared; 2606 PetscInt prank; 2607 PetscMPIInt size,rank,color; 2608 PetscInt *xadj,*adjncy,*oldranks; 2609 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2610 PetscInt i,j,local_size,threshold=0; 2611 PetscErrorCode ierr; 2612 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2613 PetscSubcomm subcomm; 2614 2615 PetscFunctionBegin; 2616 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2617 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2618 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2619 2620 /* Get info on mapping */ 2621 matis = (Mat_IS*)(mat->data); 2622 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2623 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2624 2625 /* build local CSR graph of subdomains' connectivity */ 2626 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2627 xadj[0] = 0; 2628 xadj[1] = PetscMax(n_neighs-1,0); 2629 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2630 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2631 2632 if (threshold) { 2633 PetscInt* count,min_threshold; 2634 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2635 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2636 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2637 for (j=0;j<n_shared[i];j++) { 2638 count[shared[i][j]] += 1; 2639 } 2640 } 2641 /* adapt threshold since we dont want to lose significant connections */ 2642 min_threshold = n_neighs; 2643 for (i=1;i<n_neighs;i++) { 2644 for (j=0;j<n_shared[i];j++) { 2645 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2646 } 2647 } 2648 threshold = PetscMax(min_threshold+1,threshold); 2649 xadj[1] = 0; 2650 for (i=1;i<n_neighs;i++) { 2651 for (j=0;j<n_shared[i];j++) { 2652 if (count[shared[i][j]] < threshold) { 2653 adjncy[xadj[1]] = neighs[i]; 2654 adjncy_wgt[xadj[1]] = n_shared[i]; 2655 xadj[1]++; 2656 break; 2657 } 2658 } 2659 } 2660 ierr = PetscFree(count);CHKERRQ(ierr); 2661 } else { 2662 if (xadj[1]) { 2663 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2664 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2665 } 2666 } 2667 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2668 if (use_square) { 2669 for (i=0;i<xadj[1];i++) { 2670 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2671 } 2672 } 2673 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2674 2675 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2676 2677 /* 2678 Restrict work on active processes only. 2679 */ 2680 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2681 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2682 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2683 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2684 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2685 if (color) { 2686 ierr = PetscFree(xadj);CHKERRQ(ierr); 2687 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2688 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2689 } else { 2690 PetscInt coarsening_ratio; 2691 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2692 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2693 prank = rank; 2694 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2695 /* 2696 for (i=0;i<size;i++) { 2697 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2698 } 2699 */ 2700 for (i=0;i<xadj[1];i++) { 2701 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2702 } 2703 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2704 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2705 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2706 2707 /* Partition */ 2708 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2709 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2710 if (use_vwgt) { 2711 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2712 v_wgt[0] = local_size; 2713 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2714 } 2715 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2716 coarsening_ratio = size/n_subdomains; 2717 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2718 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2719 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2720 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2721 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2722 2723 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2724 if (contiguous) { 2725 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2726 } else { 2727 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2728 } 2729 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2730 /* clean up */ 2731 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2732 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2733 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2734 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2735 } 2736 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2737 2738 /* assemble parallel IS for sends */ 2739 i = 1; 2740 if (color) i=0; 2741 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2742 2743 /* get back IS */ 2744 *is_sends = ranks_send_to; 2745 PetscFunctionReturn(0); 2746 } 2747 2748 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2749 2750 #undef __FUNCT__ 2751 #define __FUNCT__ "MatISSubassemble" 2752 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2753 { 2754 Mat local_mat; 2755 Mat_IS *matis; 2756 IS is_sends_internal; 2757 PetscInt rows,cols; 2758 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2759 PetscBool ismatis,isdense,destroy_mat; 2760 ISLocalToGlobalMapping l2gmap; 2761 PetscInt* l2gmap_indices; 2762 const PetscInt* is_indices; 2763 MatType new_local_type; 2764 /* buffers */ 2765 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2766 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2767 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2768 /* MPI */ 2769 MPI_Comm comm,comm_n; 2770 PetscSubcomm subcomm; 2771 PetscMPIInt n_sends,n_recvs,commsize; 2772 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2773 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2774 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2775 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2776 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2777 PetscErrorCode ierr; 2778 2779 PetscFunctionBegin; 2780 /* TODO: add missing checks */ 2781 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2782 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2783 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2784 PetscValidLogicalCollectiveInt(mat,nis,7); 2785 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2786 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2787 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2788 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2789 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2790 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2791 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2792 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2793 PetscInt mrows,mcols,mnrows,mncols; 2794 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2795 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2796 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2797 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2798 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2799 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2800 } 2801 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2802 PetscValidLogicalCollectiveInt(mat,bs,0); 2803 /* prepare IS for sending if not provided */ 2804 if (!is_sends) { 2805 PetscBool pcontig = PETSC_TRUE; 2806 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2807 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 2808 } else { 2809 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2810 is_sends_internal = is_sends; 2811 } 2812 2813 /* get pointer of MATIS data */ 2814 matis = (Mat_IS*)mat->data; 2815 2816 /* get comm */ 2817 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2818 2819 /* compute number of sends */ 2820 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2821 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2822 2823 /* compute number of receives */ 2824 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2825 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 2826 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2827 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2828 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2829 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2830 ierr = PetscFree(iflags);CHKERRQ(ierr); 2831 2832 /* restrict comm if requested */ 2833 subcomm = 0; 2834 destroy_mat = PETSC_FALSE; 2835 if (restrict_comm) { 2836 PetscMPIInt color,rank,subcommsize; 2837 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2838 color = 0; 2839 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 2840 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2841 subcommsize = commsize - subcommsize; 2842 /* check if reuse has been requested */ 2843 if (reuse == MAT_REUSE_MATRIX) { 2844 if (*mat_n) { 2845 PetscMPIInt subcommsize2; 2846 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 2847 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 2848 comm_n = PetscObjectComm((PetscObject)*mat_n); 2849 } else { 2850 comm_n = PETSC_COMM_SELF; 2851 } 2852 } else { /* MAT_INITIAL_MATRIX */ 2853 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 2854 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 2855 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2856 comm_n = subcomm->comm; 2857 } 2858 /* flag to destroy *mat_n if not significative */ 2859 if (color) destroy_mat = PETSC_TRUE; 2860 } else { 2861 comm_n = comm; 2862 } 2863 2864 /* prepare send/receive buffers */ 2865 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 2866 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2867 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 2868 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2869 if (nis) { 2870 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 2871 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 2872 } 2873 2874 /* Get data from local matrices */ 2875 if (!isdense) { 2876 /* TODO: See below some guidelines on how to prepare the local buffers */ 2877 /* 2878 send_buffer_vals should contain the raw values of the local matrix 2879 send_buffer_idxs should contain: 2880 - MatType_PRIVATE type 2881 - PetscInt size_of_l2gmap 2882 - PetscInt global_row_indices[size_of_l2gmap] 2883 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2884 */ 2885 } else { 2886 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2887 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2888 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 2889 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2890 send_buffer_idxs[1] = i; 2891 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2892 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2893 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2894 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2895 for (i=0;i<n_sends;i++) { 2896 ilengths_vals[is_indices[i]] = len*len; 2897 ilengths_idxs[is_indices[i]] = len+2; 2898 } 2899 } 2900 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2901 /* additional is (if any) */ 2902 if (nis) { 2903 PetscMPIInt psum; 2904 PetscInt j; 2905 for (j=0,psum=0;j<nis;j++) { 2906 PetscInt plen; 2907 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 2908 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 2909 psum += len+1; /* indices + lenght */ 2910 } 2911 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 2912 for (j=0,psum=0;j<nis;j++) { 2913 PetscInt plen; 2914 const PetscInt *is_array_idxs; 2915 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 2916 send_buffer_idxs_is[psum] = plen; 2917 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 2918 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 2919 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 2920 psum += plen+1; /* indices + lenght */ 2921 } 2922 for (i=0;i<n_sends;i++) { 2923 ilengths_idxs_is[is_indices[i]] = psum; 2924 } 2925 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 2926 } 2927 2928 buf_size_idxs = 0; 2929 buf_size_vals = 0; 2930 buf_size_idxs_is = 0; 2931 for (i=0;i<n_recvs;i++) { 2932 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2933 buf_size_vals += (PetscInt)olengths_vals[i]; 2934 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 2935 } 2936 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 2937 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 2938 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 2939 2940 /* get new tags for clean communications */ 2941 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2942 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2943 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 2944 2945 /* allocate for requests */ 2946 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 2947 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 2948 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 2949 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 2950 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 2951 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 2952 2953 /* communications */ 2954 ptr_idxs = recv_buffer_idxs; 2955 ptr_vals = recv_buffer_vals; 2956 ptr_idxs_is = recv_buffer_idxs_is; 2957 for (i=0;i<n_recvs;i++) { 2958 source_dest = onodes[i]; 2959 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2960 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2961 ptr_idxs += olengths_idxs[i]; 2962 ptr_vals += olengths_vals[i]; 2963 if (nis) { 2964 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 2965 ptr_idxs_is += olengths_idxs_is[i]; 2966 } 2967 } 2968 for (i=0;i<n_sends;i++) { 2969 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2970 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2971 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2972 if (nis) { 2973 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 2974 } 2975 } 2976 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2977 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2978 2979 /* assemble new l2g map */ 2980 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2981 ptr_idxs = recv_buffer_idxs; 2982 buf_size_idxs = 0; 2983 for (i=0;i<n_recvs;i++) { 2984 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2985 ptr_idxs += olengths_idxs[i]; 2986 } 2987 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 2988 ptr_idxs = recv_buffer_idxs; 2989 buf_size_idxs = 0; 2990 for (i=0;i<n_recvs;i++) { 2991 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2992 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2993 ptr_idxs += olengths_idxs[i]; 2994 } 2995 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2996 ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2997 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2998 2999 /* infer new local matrix type from received local matrices type */ 3000 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3001 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3002 if (n_recvs) { 3003 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3004 ptr_idxs = recv_buffer_idxs; 3005 for (i=0;i<n_recvs;i++) { 3006 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3007 new_local_type_private = MATAIJ_PRIVATE; 3008 break; 3009 } 3010 ptr_idxs += olengths_idxs[i]; 3011 } 3012 switch (new_local_type_private) { 3013 case MATDENSE_PRIVATE: 3014 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3015 new_local_type = MATSEQAIJ; 3016 bs = 1; 3017 } else { /* if I receive only 1 dense matrix */ 3018 new_local_type = MATSEQDENSE; 3019 bs = 1; 3020 } 3021 break; 3022 case MATAIJ_PRIVATE: 3023 new_local_type = MATSEQAIJ; 3024 bs = 1; 3025 break; 3026 case MATBAIJ_PRIVATE: 3027 new_local_type = MATSEQBAIJ; 3028 break; 3029 case MATSBAIJ_PRIVATE: 3030 new_local_type = MATSEQSBAIJ; 3031 break; 3032 default: 3033 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3034 break; 3035 } 3036 } else { /* by default, new_local_type is seqdense */ 3037 new_local_type = MATSEQDENSE; 3038 bs = 1; 3039 } 3040 3041 /* create MATIS object if needed */ 3042 if (reuse == MAT_INITIAL_MATRIX) { 3043 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3044 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3045 } else { 3046 /* it also destroys the local matrices */ 3047 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3048 } 3049 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3050 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3051 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3052 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3053 3054 /* set values */ 3055 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3056 ptr_vals = recv_buffer_vals; 3057 ptr_idxs = recv_buffer_idxs; 3058 for (i=0;i<n_recvs;i++) { 3059 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3060 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3061 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3062 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3063 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3064 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3065 } else { 3066 /* TODO */ 3067 } 3068 ptr_idxs += olengths_idxs[i]; 3069 ptr_vals += olengths_vals[i]; 3070 } 3071 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3072 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3073 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3074 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3075 3076 #if 0 3077 if (!restrict_comm) { /* check */ 3078 Vec lvec,rvec; 3079 PetscReal infty_error; 3080 3081 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3082 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3083 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3084 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3085 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3086 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3087 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3088 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3089 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3090 } 3091 #endif 3092 3093 /* assemble new additional is (if any) */ 3094 if (nis) { 3095 PetscInt **temp_idxs,*count_is,j,psum; 3096 3097 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3098 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3099 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3100 ptr_idxs = recv_buffer_idxs_is; 3101 psum = 0; 3102 for (i=0;i<n_recvs;i++) { 3103 for (j=0;j<nis;j++) { 3104 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3105 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3106 psum += plen; 3107 ptr_idxs += plen+1; /* shift pointer to received data */ 3108 } 3109 } 3110 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3111 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3112 for (i=1;i<nis;i++) { 3113 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3114 } 3115 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3116 ptr_idxs = recv_buffer_idxs_is; 3117 for (i=0;i<n_recvs;i++) { 3118 for (j=0;j<nis;j++) { 3119 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3120 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3121 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3122 ptr_idxs += plen+1; /* shift pointer to received data */ 3123 } 3124 } 3125 for (i=0;i<nis;i++) { 3126 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3127 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3128 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3129 } 3130 ierr = PetscFree(count_is);CHKERRQ(ierr); 3131 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3132 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3133 } 3134 /* free workspace */ 3135 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3136 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3137 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3138 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3139 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3140 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3141 if (isdense) { 3142 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3143 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3144 } else { 3145 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3146 } 3147 if (nis) { 3148 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3149 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3150 } 3151 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3152 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3153 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3154 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3155 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3156 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3157 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3158 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3159 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3160 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3161 ierr = PetscFree(onodes);CHKERRQ(ierr); 3162 if (nis) { 3163 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3164 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3165 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3166 } 3167 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3168 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3169 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3170 for (i=0;i<nis;i++) { 3171 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3172 } 3173 } 3174 PetscFunctionReturn(0); 3175 } 3176 3177 /* temporary hack into ksp private data structure */ 3178 #include <petsc-private/kspimpl.h> 3179 3180 #undef __FUNCT__ 3181 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3182 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3183 { 3184 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3185 PC_IS *pcis = (PC_IS*)pc->data; 3186 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3187 MatNullSpace CoarseNullSpace=NULL; 3188 ISLocalToGlobalMapping coarse_islg; 3189 IS coarse_is,*isarray; 3190 PetscInt i,im_active=-1,active_procs=-1; 3191 PetscInt nis,nisdofs,nisneu; 3192 PC pc_temp; 3193 PCType coarse_pc_type; 3194 KSPType coarse_ksp_type; 3195 PetscBool multilevel_requested,multilevel_allowed; 3196 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3197 Mat t_coarse_mat_is; 3198 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3199 PetscMPIInt all_procs; 3200 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3201 PetscBool compute_vecs = PETSC_FALSE; 3202 PetscErrorCode ierr; 3203 3204 PetscFunctionBegin; 3205 /* Assign global numbering to coarse dofs */ 3206 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3207 compute_vecs = PETSC_TRUE; 3208 PetscInt ocoarse_size; 3209 ocoarse_size = pcbddc->coarse_size; 3210 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3211 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3212 /* see if we can avoid some work */ 3213 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3214 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3215 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3216 coarse_reuse = PETSC_FALSE; 3217 } else { /* we can safely reuse already computed coarse matrix */ 3218 coarse_reuse = PETSC_TRUE; 3219 } 3220 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3221 coarse_reuse = PETSC_FALSE; 3222 } 3223 /* reset any subassembling information */ 3224 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3225 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3226 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3227 coarse_reuse = PETSC_TRUE; 3228 } 3229 3230 /* count "active" (i.e. with positive local size) and "void" processes */ 3231 im_active = !!(pcis->n); 3232 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3233 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3234 void_procs = all_procs-active_procs; 3235 csin_type_simple = PETSC_TRUE; 3236 if (pcbddc->current_level) { 3237 csin_ml = PETSC_TRUE; 3238 ncoarse_ml = void_procs; 3239 csin_ds = PETSC_TRUE; 3240 ncoarse_ds = void_procs; 3241 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3242 } else { 3243 csin_ml = PETSC_FALSE; 3244 ncoarse_ml = all_procs; 3245 if (void_procs) { 3246 csin_ds = PETSC_TRUE; 3247 ncoarse_ds = void_procs; 3248 csin_type_simple = PETSC_FALSE; 3249 } else { 3250 csin_ds = PETSC_FALSE; 3251 ncoarse_ds = all_procs; 3252 } 3253 } 3254 3255 /* 3256 test if we can go multilevel: three conditions must be satisfied: 3257 - we have not exceeded the number of levels requested 3258 - we can actually subassemble the active processes 3259 - we can find a suitable number of MPI processes where we can place the subassembled problem 3260 */ 3261 multilevel_allowed = PETSC_FALSE; 3262 multilevel_requested = PETSC_FALSE; 3263 if (pcbddc->current_level < pcbddc->max_levels) { 3264 multilevel_requested = PETSC_TRUE; 3265 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3266 multilevel_allowed = PETSC_FALSE; 3267 } else { 3268 multilevel_allowed = PETSC_TRUE; 3269 } 3270 } 3271 /* determine number of process partecipating to coarse solver */ 3272 if (multilevel_allowed) { 3273 ncoarse = ncoarse_ml; 3274 csin = csin_ml; 3275 } else { 3276 ncoarse = ncoarse_ds; 3277 csin = csin_ds; 3278 } 3279 3280 /* creates temporary l2gmap and IS for coarse indexes */ 3281 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3282 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3283 3284 /* creates temporary MATIS object for coarse matrix */ 3285 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3286 #if 0 3287 { 3288 PetscViewer viewer; 3289 char filename[256]; 3290 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3291 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3292 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3293 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3294 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3295 } 3296 #endif 3297 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3298 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3299 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3300 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3302 3303 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3304 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3305 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3306 const PetscInt *idxs; 3307 ISLocalToGlobalMapping tmap; 3308 3309 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3310 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3311 /* allocate space for temporary storage */ 3312 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3313 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3314 /* allocate for IS array */ 3315 nisdofs = pcbddc->n_ISForDofsLocal; 3316 nisneu = !!pcbddc->NeumannBoundariesLocal; 3317 nis = nisdofs + nisneu; 3318 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3319 /* dofs splitting */ 3320 for (i=0;i<nisdofs;i++) { 3321 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3322 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3323 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3324 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3325 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3326 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3327 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3328 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3329 } 3330 /* neumann boundaries */ 3331 if (pcbddc->NeumannBoundariesLocal) { 3332 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3333 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3334 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3335 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3336 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3337 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3338 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3339 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3340 } 3341 /* free memory */ 3342 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3343 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3344 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3345 } else { 3346 nis = 0; 3347 nisdofs = 0; 3348 nisneu = 0; 3349 isarray = NULL; 3350 } 3351 /* destroy no longer needed map */ 3352 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3353 3354 /* restrict on coarse candidates (if needed) */ 3355 coarse_mat_is = NULL; 3356 if (csin) { 3357 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3358 PetscInt j,tissize,*nisindices; 3359 PetscInt *coarse_candidates; 3360 const PetscInt* tisindices; 3361 /* get coarse candidates' ranks in pc communicator */ 3362 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3363 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3364 for (i=0,j=0;i<all_procs;i++) { 3365 if (!coarse_candidates[i]) { 3366 coarse_candidates[j]=i; 3367 j++; 3368 } 3369 } 3370 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3371 /* get a suitable subassembling pattern */ 3372 if (csin_type_simple) { 3373 PetscMPIInt rank; 3374 PetscInt issize,isidx; 3375 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3376 if (im_active) { 3377 issize = 1; 3378 isidx = (PetscInt)rank; 3379 } else { 3380 issize = 0; 3381 isidx = -1; 3382 } 3383 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3384 } else { 3385 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3386 } 3387 if (pcbddc->dbg_flag) { 3388 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3389 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3390 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3391 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3392 for (i=0;i<j;i++) { 3393 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3394 } 3395 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3396 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3397 } 3398 /* shift the pattern on coarse candidates */ 3399 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3400 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3401 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3402 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3403 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3404 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3405 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3406 } 3407 if (pcbddc->dbg_flag) { 3408 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3409 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3410 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3411 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3412 } 3413 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3414 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3415 } else { 3416 if (pcbddc->dbg_flag) { 3417 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3418 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3419 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3420 } 3421 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3422 coarse_mat_is = t_coarse_mat_is; 3423 } 3424 3425 /* create local to global scatters for coarse problem */ 3426 if (compute_vecs) { 3427 PetscInt lrows; 3428 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3429 if (coarse_mat_is) { 3430 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3431 } else { 3432 lrows = 0; 3433 } 3434 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3435 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3436 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3437 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3438 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3439 } 3440 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3441 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3442 3443 /* set defaults for coarse KSP and PC */ 3444 if (multilevel_allowed) { 3445 coarse_ksp_type = KSPRICHARDSON; 3446 coarse_pc_type = PCBDDC; 3447 } else { 3448 coarse_ksp_type = KSPPREONLY; 3449 coarse_pc_type = PCREDUNDANT; 3450 } 3451 3452 /* print some info if requested */ 3453 if (pcbddc->dbg_flag) { 3454 if (!multilevel_allowed) { 3455 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3456 if (multilevel_requested) { 3457 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3458 } else if (pcbddc->max_levels) { 3459 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3460 } 3461 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3462 } 3463 } 3464 3465 /* create the coarse KSP object only once with defaults */ 3466 if (coarse_mat_is) { 3467 MatReuse coarse_mat_reuse; 3468 PetscViewer dbg_viewer = NULL; 3469 if (pcbddc->dbg_flag) { 3470 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3471 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3472 } 3473 if (!pcbddc->coarse_ksp) { 3474 char prefix[256],str_level[16]; 3475 size_t len; 3476 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3477 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3478 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3479 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3480 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3481 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3482 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3483 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3484 /* prefix */ 3485 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3486 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3487 if (!pcbddc->current_level) { 3488 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3489 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3490 } else { 3491 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3492 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3493 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3494 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3495 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3496 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3497 } 3498 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3499 /* allow user customization */ 3500 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3501 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3502 } 3503 3504 /* get some info after set from options */ 3505 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3506 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3507 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3508 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3509 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3510 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3511 isbddc = PETSC_FALSE; 3512 } 3513 if (isredundant) { 3514 KSP inner_ksp; 3515 PC inner_pc; 3516 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3517 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3518 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3519 } 3520 3521 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3522 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3523 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3524 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3525 if (nisdofs) { 3526 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3527 for (i=0;i<nisdofs;i++) { 3528 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3529 } 3530 } 3531 if (nisneu) { 3532 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3533 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3534 } 3535 3536 /* assemble coarse matrix */ 3537 if (coarse_reuse) { 3538 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3539 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3540 coarse_mat_reuse = MAT_REUSE_MATRIX; 3541 } else { 3542 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3543 } 3544 if (isbddc || isnn) { 3545 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3546 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3547 if (pcbddc->dbg_flag) { 3548 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3549 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3550 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3551 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3552 } 3553 } 3554 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3555 } else { 3556 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3557 } 3558 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3559 3560 /* propagate symmetry info to coarse matrix */ 3561 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3562 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3563 3564 /* set operators */ 3565 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3566 if (pcbddc->dbg_flag) { 3567 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3568 } 3569 } else { /* processes non partecipating to coarse solver (if any) */ 3570 coarse_mat = 0; 3571 } 3572 ierr = PetscFree(isarray);CHKERRQ(ierr); 3573 #if 0 3574 { 3575 PetscViewer viewer; 3576 char filename[256]; 3577 sprintf(filename,"coarse_mat.m"); 3578 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3579 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3580 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3581 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3582 } 3583 #endif 3584 3585 /* Compute coarse null space (special handling by BDDC only) */ 3586 if (pcbddc->NullSpace) { 3587 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3588 } 3589 3590 if (pcbddc->coarse_ksp) { 3591 Vec crhs,csol; 3592 PetscBool ispreonly; 3593 if (CoarseNullSpace) { 3594 if (isbddc) { 3595 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3596 } else { 3597 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3598 } 3599 } 3600 /* setup coarse ksp */ 3601 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3602 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3603 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3604 /* hack */ 3605 if (!csol) { 3606 ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3607 } 3608 if (!crhs) { 3609 ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3610 } 3611 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3612 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3613 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3614 KSP check_ksp; 3615 KSPType check_ksp_type; 3616 PC check_pc; 3617 Vec check_vec,coarse_vec; 3618 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3619 PetscInt its; 3620 PetscBool compute_eigs; 3621 PetscReal *eigs_r,*eigs_c; 3622 PetscInt neigs; 3623 const char *prefix; 3624 3625 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3626 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3627 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3628 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3629 if (ispreonly) { 3630 check_ksp_type = KSPPREONLY; 3631 compute_eigs = PETSC_FALSE; 3632 } else { 3633 check_ksp_type = KSPGMRES; 3634 compute_eigs = PETSC_TRUE; 3635 } 3636 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3637 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3638 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3639 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3640 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3641 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3642 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3643 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3644 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3645 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3646 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3647 /* create random vec */ 3648 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3649 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3650 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3651 if (CoarseNullSpace) { 3652 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3653 } 3654 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3655 /* solve coarse problem */ 3656 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3657 if (CoarseNullSpace) { 3658 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3659 } 3660 /* set eigenvalue estimation if preonly has not been requested */ 3661 if (compute_eigs) { 3662 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3663 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3664 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3665 lambda_max = eigs_r[neigs-1]; 3666 lambda_min = eigs_r[0]; 3667 if (pcbddc->use_coarse_estimates) { 3668 if (lambda_max>lambda_min) { 3669 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3670 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3671 } 3672 } 3673 } 3674 3675 /* check coarse problem residual error */ 3676 if (pcbddc->dbg_flag) { 3677 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3678 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3679 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3680 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3681 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3682 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3683 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3684 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3685 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3686 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3687 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3688 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3689 if (compute_eigs) { 3690 PetscReal lambda_max_s,lambda_min_s; 3691 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3692 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3693 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3694 for (i=0;i<neigs;i++) { 3695 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3696 } 3697 } 3698 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3699 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3700 } 3701 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3702 if (compute_eigs) { 3703 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3704 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3705 } 3706 } 3707 } 3708 /* print additional info */ 3709 if (pcbddc->dbg_flag) { 3710 /* waits until all processes reaches this point */ 3711 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3712 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3713 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3714 } 3715 3716 /* free memory */ 3717 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3718 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3719 PetscFunctionReturn(0); 3720 } 3721 3722 #undef __FUNCT__ 3723 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3724 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3725 { 3726 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3727 PC_IS* pcis = (PC_IS*)pc->data; 3728 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3729 PetscInt i,coarse_size; 3730 PetscInt *local_primal_indices; 3731 PetscErrorCode ierr; 3732 3733 PetscFunctionBegin; 3734 /* Compute global number of coarse dofs */ 3735 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3736 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3737 } 3738 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3739 3740 /* check numbering */ 3741 if (pcbddc->dbg_flag) { 3742 PetscScalar coarsesum,*array; 3743 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 3744 3745 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3746 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3747 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3748 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3749 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3750 for (i=0;i<pcbddc->local_primal_size;i++) { 3751 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3752 } 3753 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3754 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3755 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3756 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3757 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3758 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3759 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3760 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3761 for (i=0;i<pcis->n;i++) { 3762 if (array[i] == 1.0) { 3763 set_error = PETSC_TRUE; 3764 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3765 } 3766 } 3767 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3768 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3769 for (i=0;i<pcis->n;i++) { 3770 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3771 } 3772 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3773 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3774 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3775 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3776 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3777 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3778 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 3779 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3780 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3781 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3782 for (i=0;i<pcbddc->local_primal_size;i++) { 3783 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3784 } 3785 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3786 } 3787 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3788 if (set_error_reduced) { 3789 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 3790 } 3791 } 3792 /* get back data */ 3793 *coarse_size_n = coarse_size; 3794 *local_primal_indices_n = local_primal_indices; 3795 PetscFunctionReturn(0); 3796 } 3797 3798 #undef __FUNCT__ 3799 #define __FUNCT__ "PCBDDCGlobalToLocal" 3800 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 3801 { 3802 IS localis_t; 3803 PetscInt i,lsize,*idxs,n; 3804 PetscScalar *vals; 3805 PetscErrorCode ierr; 3806 3807 PetscFunctionBegin; 3808 /* get indices in local ordering exploiting local to global map */ 3809 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3810 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3811 for (i=0;i<lsize;i++) vals[i] = 1.0; 3812 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3813 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 3814 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 3815 if (idxs) { /* multilevel guard */ 3816 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3817 } 3818 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 3819 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3820 ierr = PetscFree(vals);CHKERRQ(ierr); 3821 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 3822 /* now compute set in local ordering */ 3823 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3824 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3825 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 3826 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 3827 for (i=0,lsize=0;i<n;i++) { 3828 if (PetscRealPart(vals[i]) > 0.5) { 3829 lsize++; 3830 } 3831 } 3832 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3833 for (i=0,lsize=0;i<n;i++) { 3834 if (PetscRealPart(vals[i]) > 0.5) { 3835 idxs[lsize++] = i; 3836 } 3837 } 3838 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 3839 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3840 *localis = localis_t; 3841 PetscFunctionReturn(0); 3842 } 3843