1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCSetUpSolvers" 10 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscScalar *coarse_submat_vals; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 /* Compute matrix after change of basis and extract local submatrices */ 18 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D */ 21 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 22 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 23 24 /* Setup local solvers ksp_D and ksp_R */ 25 /* PCBDDCSetUpLocalScatters should be called first! */ 26 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 27 28 /* Change global null space passed in by the user if change of basis has been requested */ 29 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 30 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 31 } 32 33 /* 34 Setup local correction and local part of coarse basis. 35 Gives back the dense local part of the coarse matrix in column major ordering 36 */ 37 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 38 39 /* Compute total number of coarse nodes and setup coarse solver */ 40 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 41 42 /* free */ 43 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "PCBDDCResetCustomization" 49 PetscErrorCode PCBDDCResetCustomization(PC pc) 50 { 51 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 57 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 61 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 62 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 63 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 64 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 65 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCResetTopography" 71 PetscErrorCode PCBDDCResetTopography(PC pc) 72 { 73 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 78 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 79 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 80 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 #undef __FUNCT__ 85 #define __FUNCT__ "PCBDDCResetSolvers" 86 PetscErrorCode PCBDDCResetSolvers(PC pc) 87 { 88 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 93 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 101 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 109 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 110 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 111 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 112 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 113 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 114 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 115 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 122 { 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PC_IS *pcis = (PC_IS*)pc->data; 125 VecType impVecType; 126 PetscInt n_constraints,n_R,old_size; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 if (!pcbddc->ConstraintMatrix) { 131 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 132 } 133 /* get sizes */ 134 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 135 n_R = pcis->n-pcbddc->n_actual_vertices; 136 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 137 /* local work vectors (try to avoid unneeded work)*/ 138 /* R nodes */ 139 old_size = -1; 140 if (pcbddc->vec1_R) { 141 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 142 } 143 if (n_R != old_size) { 144 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 146 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 147 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 148 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 149 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 150 } 151 /* local primal dofs */ 152 old_size = -1; 153 if (pcbddc->vec1_P) { 154 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 155 } 156 if (pcbddc->local_primal_size != old_size) { 157 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 158 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 159 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 160 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 161 } 162 /* local explicit constraints */ 163 old_size = -1; 164 if (pcbddc->vec1_C) { 165 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 166 } 167 if (n_constraints && n_constraints != old_size) { 168 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 169 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 170 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 171 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "PCBDDCSetUpCorrection" 178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 179 { 180 PetscErrorCode ierr; 181 /* pointers to pcis and pcbddc */ 182 PC_IS* pcis = (PC_IS*)pc->data; 183 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 184 /* submatrices of local problem */ 185 Mat A_RV,A_VR,A_VV; 186 /* working matrices */ 187 Mat M1,M2,M3,C_CR; 188 /* working vectors */ 189 Vec vec1_C,vec2_C,vec1_V,vec2_V; 190 /* additional working stuff */ 191 IS is_aux; 192 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 193 const PetscScalar *array,*row_cmat_values; 194 const PetscInt *row_cmat_indices,*idx_R_local; 195 PetscInt *idx_V_B,*auxindices; 196 PetscInt n_vertices,n_constraints,size_of_constraint; 197 PetscInt i,j,n_R,n_D,n_B; 198 PetscBool unsymmetric_check; 199 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 200 MatType impMatType; 201 /* some shortcuts to scalars */ 202 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 203 /* for debugging purposes */ 204 PetscReal *coarsefunctions_errors,*constraints_errors; 205 206 PetscFunctionBegin; 207 /* get number of vertices (corners plus constraints with change of basis) 208 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 209 n_vertices = pcbddc->n_actual_vertices; 210 n_constraints = pcbddc->local_primal_size-n_vertices; 211 /* Set Non-overlapping dimensions */ 212 n_B = pcis->n_B; n_D = pcis->n - n_B; 213 n_R = pcis->n-n_vertices; 214 215 /* Set types for local objects needed by BDDC precondtioner */ 216 impMatType = MATSEQDENSE; 217 218 /* Allocating some extra storage just to be safe */ 219 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 220 for (i=0;i<pcis->n;i++) auxindices[i]=i; 221 222 /* vertices in boundary numbering */ 223 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 224 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 225 if (i != n_vertices) { 226 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 227 } 228 229 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 230 if (n_constraints) { 231 /* see if we can save some allocations */ 232 if (pcbddc->local_auxmat2) { 233 PetscInt on_R,on_constraints; 234 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 235 if (on_R != n_R || on_constraints != n_constraints) { 236 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 237 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 238 } 239 } 240 /* work vectors */ 241 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 242 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 243 /* auxiliary matrices */ 244 if (!pcbddc->local_auxmat2) { 245 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 246 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 247 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 249 } 250 251 /* Extract constraints on R nodes: C_{CR} */ 252 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 253 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 254 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 255 256 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 257 for (i=0;i<n_constraints;i++) { 258 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 259 /* Get row of constraint matrix in R numbering */ 260 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 262 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 263 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 264 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 265 /* Solve for row of constraint matrix in R numbering */ 266 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 267 /* Set values in local_auxmat2 */ 268 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 270 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 271 } 272 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 274 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 275 276 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 277 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 278 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 279 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 280 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 281 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 282 ierr = MatSetUp(M1);CHKERRQ(ierr); 283 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 284 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 285 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 286 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 287 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 288 ierr = MatDestroy(&M2);CHKERRQ(ierr); 289 ierr = MatDestroy(&M3);CHKERRQ(ierr); 290 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 291 if (!pcbddc->local_auxmat1) { 292 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } else { 294 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 295 } 296 } 297 298 /* Get submatrices from subdomain matrix */ 299 if (n_vertices) { 300 PetscInt ibs,mbs; 301 PetscBool issbaij; 302 Mat newmat; 303 304 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 305 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 306 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 307 if (ibs != mbs) { /* need to convert to SEQAIJ */ 308 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 310 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 311 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 313 } else { 314 /* this is safe */ 315 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 316 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 317 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 318 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 319 /* which of the two approaches is faster? */ 320 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 322 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 324 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 325 } else { 326 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 327 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 328 } 329 } 330 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 331 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 332 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 333 } 334 335 /* Matrix of coarse basis functions (local) */ 336 if (pcbddc->coarse_phi_B) { 337 PetscInt on_B,on_primal; 338 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 339 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 340 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 341 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 342 } 343 } 344 if (pcbddc->coarse_phi_D) { 345 PetscInt on_D,on_primal; 346 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 347 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 348 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 349 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 350 } 351 } 352 if (!pcbddc->coarse_phi_B) { 353 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 355 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 356 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 357 } 358 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 359 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 361 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 362 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 363 } 364 365 if (pcbddc->dbg_flag) { 366 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 367 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 368 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 369 } 370 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 371 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 372 373 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 374 375 /* vertices */ 376 for (i=0;i<n_vertices;i++) { 377 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 378 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 379 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 380 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 381 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 382 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 383 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 384 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 387 if (n_constraints) { 388 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 389 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 390 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 391 } 392 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 393 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 394 395 /* Set values in coarse basis function and subdomain part of coarse_mat */ 396 /* coarse basis functions */ 397 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 398 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 399 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 400 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 402 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 403 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 404 if (pcbddc->switch_static || pcbddc->dbg_flag) { 405 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 406 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 410 } 411 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 412 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 415 if (n_constraints) { 416 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 418 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 419 } 420 421 /* check */ 422 if (pcbddc->dbg_flag) { 423 /* assemble subdomain vector on local nodes */ 424 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 425 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 if (n_R) { 427 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 428 } 429 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 430 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 432 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 433 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 434 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 435 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 437 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 438 if (n_constraints) { 439 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 441 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 442 } 443 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 444 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 445 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 446 /* check saddle point solution */ 447 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 448 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 449 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 450 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 451 /* shift by the identity matrix */ 452 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 453 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 455 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 456 } 457 } 458 459 /* constraints */ 460 for (i=0;i<n_constraints;i++) { 461 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 462 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 463 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 464 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 465 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 466 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 467 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 468 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 469 if (n_vertices) { 470 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 471 } 472 /* Set values in coarse basis function and subdomain part of coarse_mat */ 473 /* coarse basis functions */ 474 j = i+n_vertices; /* don't touch this! */ 475 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 476 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 479 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 481 if (pcbddc->switch_static || pcbddc->dbg_flag) { 482 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 483 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 484 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 485 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 487 } 488 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 489 if (n_vertices) { 490 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 491 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 492 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 493 } 494 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 495 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 496 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 497 498 if (pcbddc->dbg_flag) { 499 /* assemble subdomain vector on nodes */ 500 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 501 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 502 if (n_R) { 503 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 504 } 505 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 506 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 507 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 508 /* assemble subdomain vector of lagrange multipliers */ 509 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 510 if (n_vertices) { 511 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 512 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 514 } 515 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 516 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 517 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 518 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 519 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 521 /* check saddle point solution */ 522 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 523 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 524 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 525 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 526 /* shift by the identity matrix */ 527 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 528 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 529 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 530 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 531 } 532 } 533 /* call assembling routines for local coarse basis */ 534 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 535 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 536 if (pcbddc->switch_static || pcbddc->dbg_flag) { 537 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 538 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 539 } 540 541 /* compute other basis functions for non-symmetric problems */ 542 /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/ 543 { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */ 544 PetscBool setsym; 545 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr); 546 if (!setsym) pcbddc->issym = PETSC_FALSE; 547 } 548 549 if (!pcbddc->issym) { 550 if (!pcbddc->coarse_psi_B) { 551 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 552 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 553 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 554 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 555 } 556 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 557 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 558 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 560 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 561 } 562 for (i=0;i<pcbddc->local_primal_size;i++) { 563 if (n_constraints) { 564 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 565 for (j=0;j<n_constraints;j++) { 566 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 567 } 568 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 569 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 570 } 571 if (i<n_vertices) { 572 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 573 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 574 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 575 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 576 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 577 if (n_constraints) { 578 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 579 } 580 } else { 581 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 582 } 583 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 584 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 585 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 586 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 587 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 588 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 589 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 590 if (i<n_vertices) { 591 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 592 } 593 if (pcbddc->switch_static || pcbddc->dbg_flag) { 594 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 595 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 596 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 597 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 598 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 599 } 600 601 if (pcbddc->dbg_flag) { 602 /* assemble subdomain vector on nodes */ 603 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 604 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 605 if (n_R) { 606 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 607 } 608 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 609 if (i<n_vertices) { 610 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 611 } 612 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 614 /* assemble subdomain vector of lagrange multipliers */ 615 for (j=0;j<pcbddc->local_primal_size;j++) { 616 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 617 } 618 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 619 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 620 /* check saddle point solution */ 621 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 622 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 623 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 624 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 625 /* shift by the identity matrix */ 626 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 627 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 628 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 629 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 630 } 631 } 632 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 633 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 634 if (pcbddc->switch_static || pcbddc->dbg_flag) { 635 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 637 } 638 unsymmetric_check = PETSC_TRUE; 639 } else { /* take references to already computed coarse basis */ 640 unsymmetric_check = PETSC_FALSE; 641 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 642 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 643 if (pcbddc->coarse_phi_D) { 644 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 645 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 646 } 647 } 648 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 649 /* Checking coarse_sub_mat and coarse basis functios */ 650 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 651 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 652 if (pcbddc->dbg_flag) { 653 Mat coarse_sub_mat; 654 Mat AUXMAT,TM1,TM2,TM3,TM4; 655 Mat coarse_phi_D,coarse_phi_B; 656 Mat coarse_psi_D,coarse_psi_B; 657 Mat A_II,A_BB,A_IB,A_BI; 658 MatType checkmattype=MATSEQAIJ; 659 PetscReal real_value; 660 661 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 662 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 663 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 664 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 665 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 666 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 667 if (unsymmetric_check) { 668 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 669 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 670 } 671 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 672 673 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 674 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 676 if (unsymmetric_check) { 677 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 684 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 685 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 686 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 687 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 688 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 689 } else { 690 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 691 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 692 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 693 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 694 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 695 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 696 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 697 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 698 } 699 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 700 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 701 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 702 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 703 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 704 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 705 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 710 for (i=0;i<pcbddc->local_primal_size;i++) { 711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 712 } 713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 714 for (i=0;i<pcbddc->local_primal_size;i++) { 715 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 716 } 717 if (unsymmetric_check) { 718 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 719 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 720 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 721 } 722 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 723 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 724 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 725 } 726 } 727 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 729 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 730 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 731 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 732 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 733 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 734 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 735 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 736 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 737 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 738 if (unsymmetric_check) { 739 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 740 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 741 } 742 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 743 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 744 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 745 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 746 } 747 /* free memory */ 748 if (n_vertices) { 749 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 750 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 751 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 752 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 753 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 754 } 755 if (n_constraints) { 756 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 757 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 758 ierr = MatDestroy(&M1);CHKERRQ(ierr); 759 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 760 } 761 ierr = PetscFree(auxindices);CHKERRQ(ierr); 762 /* get back data */ 763 *coarse_submat_vals_n = coarse_submat_vals; 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNCT__ 768 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 769 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 770 { 771 PC_IS* pcis = (PC_IS*)(pc->data); 772 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 773 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 774 PetscBool issbaij,isseqaij; 775 /* manage repeated solves */ 776 MatReuse reuse; 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 781 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 782 } 783 /* get mat flags */ 784 reuse = MAT_INITIAL_MATRIX; 785 if (pc->setupcalled) { 786 if (pc->flag == SAME_NONZERO_PATTERN) { 787 reuse = MAT_REUSE_MATRIX; 788 } else { 789 reuse = MAT_INITIAL_MATRIX; 790 } 791 } 792 if (reuse == MAT_INITIAL_MATRIX) { 793 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 794 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 795 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 796 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 797 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 798 } 799 800 /* transform local matrices if needed */ 801 if (pcbddc->ChangeOfBasisMatrix) { 802 Mat *change_mat_all; 803 IS is_local,is_global; 804 PetscBool sorted; 805 PetscInt *idxs_perm; 806 807 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr); 808 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 809 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 810 ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr); 811 if (!sorted) { 812 const PetscInt *idxs; 813 PetscInt *idxs_sorted,i; 814 815 ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr); 816 ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr); 817 for (i=0;i<pcis->n;i++) { 818 idxs_perm[i] = i; 819 } 820 ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr); 821 ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr); 822 for (i=0;i<pcis->n;i++) { 823 idxs_sorted[i] = idxs[idxs_perm[i]]; 824 } 825 ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr); 826 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 827 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr); 828 } 829 830 /* get change of basis on the whole set of local dofs */ 831 ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr); 832 833 if (!sorted) { 834 Mat new_mat; 835 IS is_perm; 836 PetscInt *idxs,i; 837 838 ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr); 839 for (i=0;i<pcis->n;i++) { 840 idxs[idxs_perm[i]] = i; 841 } 842 ierr = PetscFree(idxs_perm);CHKERRQ(ierr); 843 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr); 844 ierr = ISSetPermutation(is_perm);CHKERRQ(ierr); 845 ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr); 846 ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr); 847 change_mat_all[0] = new_mat; 848 ierr = ISDestroy(&is_perm);CHKERRQ(ierr); 849 } 850 851 /* check */ 852 if (pcbddc->dbg_flag) { 853 Vec x,x_change; 854 PetscReal error; 855 856 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 857 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 858 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 859 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 860 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 863 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 864 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 865 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 866 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 867 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 868 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 870 ierr = VecDestroy(&x);CHKERRQ(ierr); 871 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 872 } 873 874 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 875 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 876 if (isseqaij) { 877 ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 878 } else { 879 Mat work_mat; 880 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 881 ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 882 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 883 } 884 /* 885 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 886 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 887 */ 888 ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr); 889 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 890 } else { 891 /* without change of basis, the local matrix is unchanged */ 892 if (!pcbddc->local_mat) { 893 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 894 pcbddc->local_mat = matis->A; 895 } 896 } 897 898 /* get submatrices */ 899 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 900 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 901 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 902 if (!issbaij) { 903 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 904 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 905 } else { 906 Mat newmat; 907 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 908 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 909 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 910 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 917 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 918 { 919 PC_IS* pcis = (PC_IS*)(pc->data); 920 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 921 IS is_aux1,is_aux2; 922 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 923 PetscInt n_vertices,i,j,n_R,n_D,n_B; 924 PetscInt vbs,bs; 925 PetscBT bitmask; 926 PetscErrorCode ierr; 927 928 PetscFunctionBegin; 929 /* 930 No need to setup local scatters if 931 - primal space is unchanged 932 AND 933 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 934 AND 935 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 936 */ 937 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 938 PetscFunctionReturn(0); 939 } 940 /* destroy old objects */ 941 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 942 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 943 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 944 /* Set Non-overlapping dimensions */ 945 n_B = pcis->n_B; n_D = pcis->n - n_B; 946 n_vertices = pcbddc->n_actual_vertices; 947 /* create auxiliary bitmask */ 948 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 949 for (i=0;i<n_vertices;i++) { 950 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 951 } 952 953 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 954 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 955 for (i=0, n_R=0; i<pcis->n; i++) { 956 if (!PetscBTLookup(bitmask,i)) { 957 idx_R_local[n_R] = i; 958 n_R++; 959 } 960 } 961 962 /* Block code */ 963 vbs = 1; 964 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 965 if (bs>1 && !(n_vertices%bs)) { 966 PetscBool is_blocked = PETSC_TRUE; 967 PetscInt *vary; 968 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 969 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 970 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 971 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 972 for (i=0; i<n_vertices; i++) { 973 if (vary[i]!=0 && vary[i]!=bs) { 974 is_blocked = PETSC_FALSE; 975 break; 976 } 977 } 978 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 979 vbs = bs; 980 for (i=0;i<n_R/vbs;i++) { 981 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 982 } 983 } 984 ierr = PetscFree(vary);CHKERRQ(ierr); 985 } 986 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 987 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 988 989 /* print some info if requested */ 990 if (pcbddc->dbg_flag) { 991 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 993 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 996 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 999 } 1000 1001 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1002 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1003 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1004 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1005 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1006 for (i=0; i<n_D; i++) { 1007 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1008 } 1009 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1010 for (i=0, j=0; i<n_R; i++) { 1011 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1012 aux_array1[j++] = i; 1013 } 1014 } 1015 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1016 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1017 for (i=0, j=0; i<n_B; i++) { 1018 if (!PetscBTLookup(bitmask,is_indices[i])) { 1019 aux_array2[j++] = i; 1020 } 1021 } 1022 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1023 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1024 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1025 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1026 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1027 1028 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1029 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1030 for (i=0, j=0; i<n_R; i++) { 1031 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1032 aux_array1[j++] = i; 1033 } 1034 } 1035 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1036 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1037 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1038 } 1039 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1040 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1047 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1048 { 1049 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1050 PC_IS *pcis = (PC_IS*)pc->data; 1051 PC pc_temp; 1052 Mat A_RR; 1053 MatReuse reuse; 1054 PetscScalar m_one = -1.0; 1055 PetscReal value; 1056 PetscInt n_D,n_R,ibs,mbs; 1057 PetscBool use_exact,use_exact_reduced,issbaij; 1058 PetscErrorCode ierr; 1059 /* prefixes stuff */ 1060 char dir_prefix[256],neu_prefix[256],str_level[16]; 1061 size_t len; 1062 1063 PetscFunctionBegin; 1064 1065 /* compute prefixes */ 1066 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1067 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1068 if (!pcbddc->current_level) { 1069 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1070 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1071 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1072 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1073 } else { 1074 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1075 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1076 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1077 len -= 15; /* remove "pc_bddc_coarse_" */ 1078 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1079 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1080 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1081 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1082 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1083 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1084 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1085 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1086 } 1087 1088 /* DIRICHLET PROBLEM */ 1089 /* Matrix for Dirichlet problem is pcis->A_II */ 1090 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1091 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1092 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1093 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 1094 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1095 /* default */ 1096 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1097 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1098 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1099 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1100 if (issbaij) { 1101 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1102 } else { 1103 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1104 } 1105 /* Allow user's customization */ 1106 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1107 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1108 } 1109 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1110 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1111 if (!n_D) { 1112 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1113 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1114 } 1115 /* Set Up KSP for Dirichlet problem of BDDC */ 1116 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1117 /* set ksp_D into pcis data */ 1118 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1119 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1120 pcis->ksp_D = pcbddc->ksp_D; 1121 1122 /* NEUMANN PROBLEM */ 1123 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1124 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1125 if (pcbddc->ksp_R) { /* already created ksp */ 1126 PetscInt nn_R; 1127 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1128 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1129 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1130 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1131 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1132 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1133 reuse = MAT_INITIAL_MATRIX; 1134 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1135 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1136 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1137 reuse = MAT_INITIAL_MATRIX; 1138 } else { /* safe to reuse the matrix */ 1139 reuse = MAT_REUSE_MATRIX; 1140 } 1141 } 1142 /* last check */ 1143 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1144 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1145 reuse = MAT_INITIAL_MATRIX; 1146 } 1147 } else { /* first time, so we need to create the matrix */ 1148 reuse = MAT_INITIAL_MATRIX; 1149 } 1150 /* extract A_RR */ 1151 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1152 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1153 if (ibs != mbs) { 1154 Mat newmat; 1155 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1156 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1157 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1158 } else { 1159 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1160 } 1161 if (!pcbddc->ksp_R) { /* create object if not present */ 1162 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1163 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 1164 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1165 /* default */ 1166 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1167 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1168 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1169 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1170 if (issbaij) { 1171 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1172 } else { 1173 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1174 } 1175 /* Allow user's customization */ 1176 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1177 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1178 } 1179 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1180 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1181 if (!n_R) { 1182 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1183 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1184 } 1185 /* Set Up KSP for Neumann problem of BDDC */ 1186 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1187 1188 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1189 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1190 /* Dirichlet */ 1191 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1192 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1193 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1194 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1195 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1196 /* need to be adapted? */ 1197 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1198 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1199 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1200 /* print info */ 1201 if (pcbddc->dbg_flag) { 1202 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1203 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1204 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1205 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1206 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1207 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1208 } 1209 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1210 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1211 } 1212 1213 /* Neumann */ 1214 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1215 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1216 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1217 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1218 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1219 /* need to be adapted? */ 1220 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1221 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1222 /* print info */ 1223 if (pcbddc->dbg_flag) { 1224 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1225 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1226 } 1227 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1228 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1229 } 1230 } 1231 /* free Neumann problem's matrix */ 1232 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1238 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1239 { 1240 PetscErrorCode ierr; 1241 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1242 1243 PetscFunctionBegin; 1244 if (applytranspose) { 1245 if (pcbddc->local_auxmat1) { 1246 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1247 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1248 } 1249 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1250 } else { 1251 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1252 if (pcbddc->local_auxmat1) { 1253 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1254 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1255 } 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1263 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1264 { 1265 PetscErrorCode ierr; 1266 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1267 PC_IS* pcis = (PC_IS*) (pc->data); 1268 const PetscScalar zero = 0.0; 1269 1270 PetscFunctionBegin; 1271 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1272 if (applytranspose) { 1273 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1274 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1275 } else { 1276 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1277 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1278 } 1279 /* start communications from local primal nodes to rhs of coarse solver */ 1280 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1281 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1282 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 1284 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1285 /* TODO remove null space when doing multilevel */ 1286 if (pcbddc->coarse_ksp) { 1287 if (applytranspose) { 1288 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1289 } else { 1290 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1291 } 1292 } 1293 1294 /* Local solution on R nodes */ 1295 if (pcis->n) { 1296 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1297 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1298 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1299 if (pcbddc->switch_static) { 1300 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1301 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1302 } 1303 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1304 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1305 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1306 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 if (pcbddc->switch_static) { 1308 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1309 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1310 } 1311 } 1312 1313 /* communications from coarse sol to local primal nodes */ 1314 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1315 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1316 1317 /* Sum contributions from two levels */ 1318 if (applytranspose) { 1319 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1320 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1321 } else { 1322 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1323 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1324 } 1325 PetscFunctionReturn(0); 1326 } 1327 1328 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1331 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1332 { 1333 PetscErrorCode ierr; 1334 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1335 PetscScalar *array,*array2; 1336 Vec from,to; 1337 1338 PetscFunctionBegin; 1339 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1340 from = pcbddc->coarse_vec; 1341 to = pcbddc->vec1_P; 1342 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1343 Vec tvec; 1344 PetscInt lsize; 1345 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1346 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1347 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1348 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1349 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1350 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1351 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1352 } 1353 } else { /* from local to global -> put data in coarse right hand side */ 1354 from = pcbddc->vec1_P; 1355 to = pcbddc->coarse_vec; 1356 } 1357 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1363 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1364 { 1365 PetscErrorCode ierr; 1366 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1367 PetscScalar *array,*array2; 1368 Vec from,to; 1369 1370 PetscFunctionBegin; 1371 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1372 from = pcbddc->coarse_vec; 1373 to = pcbddc->vec1_P; 1374 } else { /* from local to global -> put data in coarse right hand side */ 1375 from = pcbddc->vec1_P; 1376 to = pcbddc->coarse_vec; 1377 } 1378 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1379 if (smode == SCATTER_FORWARD) { 1380 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1381 Vec tvec; 1382 PetscInt lsize; 1383 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1384 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1385 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1386 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1387 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1388 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1389 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1390 } 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 1395 /* uncomment for testing purposes */ 1396 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1399 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1400 { 1401 PetscErrorCode ierr; 1402 PC_IS* pcis = (PC_IS*)(pc->data); 1403 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1404 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1405 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1406 MatType impMatType=MATSEQAIJ; 1407 /* one and zero */ 1408 PetscScalar one=1.0,zero=0.0; 1409 /* space to store constraints and their local indices */ 1410 PetscScalar *temp_quadrature_constraint; 1411 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1412 /* iterators */ 1413 PetscInt i,j,k,total_counts,temp_start_ptr; 1414 /* stuff to store connected components stored in pcbddc->mat_graph */ 1415 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1416 PetscInt n_ISForFaces,n_ISForEdges; 1417 /* near null space stuff */ 1418 MatNullSpace nearnullsp; 1419 const Vec *nearnullvecs; 1420 Vec *localnearnullsp; 1421 PetscBool nnsp_has_cnst; 1422 PetscInt nnsp_size; 1423 PetscScalar *array; 1424 /* BLAS integers */ 1425 PetscBLASInt lwork,lierr; 1426 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1427 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1428 /* LAPACK working arrays for SVD or POD */ 1429 PetscBool skip_lapack; 1430 PetscScalar *work; 1431 PetscReal *singular_vals; 1432 #if defined(PETSC_USE_COMPLEX) 1433 PetscReal *rwork; 1434 #endif 1435 #if defined(PETSC_MISSING_LAPACK_GESVD) 1436 PetscBLASInt Blas_one_2=1; 1437 PetscScalar *temp_basis,*correlation_mat; 1438 #else 1439 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1440 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1441 #endif 1442 /* reuse */ 1443 PetscInt olocal_primal_size; 1444 PetscInt *oprimal_indices_local_idxs; 1445 /* change of basis */ 1446 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1447 PetscBool boolforchange,qr_needed; 1448 PetscBT touched,change_basis,qr_needed_idx; 1449 /* auxiliary stuff */ 1450 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1451 PetscInt ncc,*gidxs,*permutation,*temp_indices_to_constraint_work; 1452 PetscScalar *temp_quadrature_constraint_work; 1453 /* some quantities */ 1454 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1455 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1456 1457 1458 PetscFunctionBegin; 1459 /* Destroy Mat objects computed previously */ 1460 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1461 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1462 /* Get index sets for faces, edges and vertices from graph */ 1463 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1464 /* free unneeded index sets */ 1465 if (!pcbddc->use_vertices) { 1466 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1467 } 1468 if (!pcbddc->use_edges) { 1469 for (i=0;i<n_ISForEdges;i++) { 1470 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1471 } 1472 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1473 n_ISForEdges = 0; 1474 } 1475 if (!pcbddc->use_faces) { 1476 for (i=0;i<n_ISForFaces;i++) { 1477 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1478 } 1479 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1480 n_ISForFaces = 0; 1481 } 1482 /* HACKS (the following two blocks of code) */ 1483 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1484 pcbddc->use_change_of_basis = PETSC_TRUE; 1485 if (!ISForEdges) { 1486 pcbddc->use_change_on_faces = PETSC_TRUE; 1487 } 1488 } 1489 if (pcbddc->NullSpace) { 1490 /* use_change_of_basis should be consistent among processors */ 1491 PetscBool tbool[2],gbool[2]; 1492 tbool [0] = pcbddc->use_change_of_basis; 1493 tbool [1] = pcbddc->use_change_on_faces; 1494 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1495 pcbddc->use_change_of_basis = gbool[0]; 1496 pcbddc->use_change_on_faces = gbool[1]; 1497 } 1498 /* print some info */ 1499 if (pcbddc->dbg_flag) { 1500 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1501 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1502 i = 0; 1503 if (ISForVertices) { 1504 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1505 } 1506 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1507 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1508 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1509 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1510 } 1511 /* check if near null space is attached to global mat */ 1512 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1513 if (nearnullsp) { 1514 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1515 /* remove any stored info */ 1516 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1517 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1518 /* store information for BDDC solver reuse */ 1519 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1520 pcbddc->onearnullspace = nearnullsp; 1521 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1522 for (i=0;i<nnsp_size;i++) { 1523 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1524 } 1525 } else { /* if near null space is not provided BDDC uses constants by default */ 1526 nnsp_size = 0; 1527 nnsp_has_cnst = PETSC_TRUE; 1528 } 1529 /* get max number of constraints on a single cc */ 1530 max_constraints = nnsp_size; 1531 if (nnsp_has_cnst) max_constraints++; 1532 1533 /* 1534 Evaluate maximum storage size needed by the procedure 1535 - temp_indices will contain start index of each constraint stored as follows 1536 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1537 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1538 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1539 */ 1540 total_counts = n_ISForFaces+n_ISForEdges; 1541 total_counts *= max_constraints; 1542 n_vertices = 0; 1543 if (ISForVertices) { 1544 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1545 } 1546 total_counts += n_vertices; 1547 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1548 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1549 total_counts = 0; 1550 max_size_of_constraint = 0; 1551 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1552 if (i<n_ISForEdges) { 1553 used_IS = &ISForEdges[i]; 1554 } else { 1555 used_IS = &ISForFaces[i-n_ISForEdges]; 1556 } 1557 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1558 total_counts += j; 1559 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1560 } 1561 total_counts *= max_constraints; 1562 total_counts += n_vertices; 1563 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1564 /* get local part of global near null space vectors */ 1565 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1566 for (k=0;k<nnsp_size;k++) { 1567 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1568 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1570 } 1571 1572 /* whether or not to skip lapack calls */ 1573 skip_lapack = PETSC_TRUE; 1574 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1575 1576 /* allocate some auxiliary stuff */ 1577 if (!skip_lapack || pcbddc->use_qr_single) { 1578 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1579 } else { 1580 gidxs = NULL; 1581 permutation = NULL; 1582 temp_indices_to_constraint_work = NULL; 1583 temp_quadrature_constraint_work = NULL; 1584 } 1585 1586 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1587 if (!skip_lapack) { 1588 PetscScalar temp_work; 1589 1590 #if defined(PETSC_MISSING_LAPACK_GESVD) 1591 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1592 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1593 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1594 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1595 #if defined(PETSC_USE_COMPLEX) 1596 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1597 #endif 1598 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1599 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1600 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1601 lwork = -1; 1602 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1603 #if !defined(PETSC_USE_COMPLEX) 1604 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1605 #else 1606 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1607 #endif 1608 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1609 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1610 #else /* on missing GESVD */ 1611 /* SVD */ 1612 PetscInt max_n,min_n; 1613 max_n = max_size_of_constraint; 1614 min_n = max_constraints; 1615 if (max_size_of_constraint < max_constraints) { 1616 min_n = max_size_of_constraint; 1617 max_n = max_constraints; 1618 } 1619 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1620 #if defined(PETSC_USE_COMPLEX) 1621 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1622 #endif 1623 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1624 lwork = -1; 1625 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1626 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1627 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1628 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1629 #if !defined(PETSC_USE_COMPLEX) 1630 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1631 #else 1632 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1633 #endif 1634 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1635 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1636 #endif /* on missing GESVD */ 1637 /* Allocate optimal workspace */ 1638 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1639 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1640 } 1641 /* Now we can loop on constraining sets */ 1642 total_counts = 0; 1643 temp_indices[0] = 0; 1644 /* vertices */ 1645 if (ISForVertices) { 1646 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1647 if (nnsp_has_cnst) { /* consider all vertices */ 1648 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1649 for (i=0;i<n_vertices;i++) { 1650 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1651 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1652 total_counts++; 1653 } 1654 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1655 PetscBool used_vertex; 1656 for (i=0;i<n_vertices;i++) { 1657 used_vertex = PETSC_FALSE; 1658 k = 0; 1659 while (!used_vertex && k<nnsp_size) { 1660 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1661 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1662 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1663 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1664 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1665 total_counts++; 1666 used_vertex = PETSC_TRUE; 1667 } 1668 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1669 k++; 1670 } 1671 } 1672 } 1673 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1674 n_vertices = total_counts; 1675 } 1676 1677 /* edges and faces */ 1678 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1679 if (ncc<n_ISForEdges) { 1680 used_IS = &ISForEdges[ncc]; 1681 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1682 } else { 1683 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1684 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1685 } 1686 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1687 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1688 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1689 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1690 /* change of basis should not be performed on local periodic nodes */ 1691 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1692 if (nnsp_has_cnst) { 1693 PetscScalar quad_value; 1694 temp_constraints++; 1695 if (!pcbddc->use_nnsp_true) { 1696 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1697 } else { 1698 quad_value = 1.0; 1699 } 1700 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1701 for (j=0;j<size_of_constraint;j++) { 1702 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1703 } 1704 /* sort by global ordering if using lapack subroutines */ 1705 if (!skip_lapack || pcbddc->use_qr_single) { 1706 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1707 for (j=0;j<size_of_constraint;j++) { 1708 permutation[j]=j; 1709 } 1710 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1711 for (j=0;j<size_of_constraint;j++) { 1712 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1713 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1714 } 1715 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1716 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1717 } 1718 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1719 total_counts++; 1720 } 1721 for (k=0;k<nnsp_size;k++) { 1722 PetscReal real_value; 1723 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1724 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1725 for (j=0;j<size_of_constraint;j++) { 1726 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1727 } 1728 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1729 /* check if array is null on the connected component */ 1730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1731 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1732 if (real_value > 0.0) { /* keep indices and values */ 1733 /* sort by global ordering if using lapack subroutines */ 1734 if (!skip_lapack || pcbddc->use_qr_single) { 1735 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1736 for (j=0;j<size_of_constraint;j++) { 1737 permutation[j]=j; 1738 } 1739 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1740 for (j=0;j<size_of_constraint;j++) { 1741 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1742 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1743 } 1744 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1745 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1746 } 1747 temp_constraints++; 1748 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1749 total_counts++; 1750 } 1751 } 1752 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1753 valid_constraints = temp_constraints; 1754 if (!pcbddc->use_nnsp_true && temp_constraints) { 1755 if (temp_constraints == 1) { /* just normalize the constraint */ 1756 PetscScalar norm; 1757 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1758 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1759 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1760 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1761 } else { /* perform SVD */ 1762 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1763 1764 #if defined(PETSC_MISSING_LAPACK_GESVD) 1765 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1766 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1767 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1768 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1769 from that computed using LAPACKgesvd 1770 -> This is due to a different computation of eigenvectors in LAPACKheev 1771 -> The quality of the POD-computed basis will be the same */ 1772 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1773 /* Store upper triangular part of correlation matrix */ 1774 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1775 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1776 for (j=0;j<temp_constraints;j++) { 1777 for (k=0;k<j+1;k++) { 1778 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1779 } 1780 } 1781 /* compute eigenvalues and eigenvectors of correlation matrix */ 1782 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1783 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1784 #if !defined(PETSC_USE_COMPLEX) 1785 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1786 #else 1787 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1788 #endif 1789 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1790 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1791 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1792 j = 0; 1793 while (j < temp_constraints && singular_vals[j] < tol) j++; 1794 total_counts = total_counts-j; 1795 valid_constraints = temp_constraints-j; 1796 /* scale and copy POD basis into used quadrature memory */ 1797 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1798 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1799 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1800 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1801 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1802 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1803 if (j<temp_constraints) { 1804 PetscInt ii; 1805 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1806 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1807 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1808 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1809 for (k=0;k<temp_constraints-j;k++) { 1810 for (ii=0;ii<size_of_constraint;ii++) { 1811 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1812 } 1813 } 1814 } 1815 #else /* on missing GESVD */ 1816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1817 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1818 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1819 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1820 #if !defined(PETSC_USE_COMPLEX) 1821 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1822 #else 1823 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1824 #endif 1825 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1826 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1827 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1828 k = temp_constraints; 1829 if (k > size_of_constraint) k = size_of_constraint; 1830 j = 0; 1831 while (j < k && singular_vals[k-j-1] < tol) j++; 1832 valid_constraints = k-j; 1833 total_counts = total_counts-temp_constraints+valid_constraints; 1834 #endif /* on missing GESVD */ 1835 } 1836 } 1837 /* setting change_of_basis flag is safe now */ 1838 if (boolforchange) { 1839 for (j=0;j<valid_constraints;j++) { 1840 PetscBTSet(change_basis,total_counts-j-1); 1841 } 1842 } 1843 } 1844 /* free index sets of faces, edges and vertices */ 1845 for (i=0;i<n_ISForFaces;i++) { 1846 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1847 } 1848 if (n_ISForFaces) { 1849 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1850 } 1851 for (i=0;i<n_ISForEdges;i++) { 1852 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1853 } 1854 if (n_ISForEdges) { 1855 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1856 } 1857 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1858 /* map temp_indices_to_constraint in boundary numbering */ 1859 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1860 if (i != temp_indices[total_counts]) { 1861 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1862 } 1863 1864 /* free workspace */ 1865 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 1866 if (!skip_lapack) { 1867 ierr = PetscFree(work);CHKERRQ(ierr); 1868 #if defined(PETSC_USE_COMPLEX) 1869 ierr = PetscFree(rwork);CHKERRQ(ierr); 1870 #endif 1871 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1872 #if defined(PETSC_MISSING_LAPACK_GESVD) 1873 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1874 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1875 #endif 1876 } 1877 for (k=0;k<nnsp_size;k++) { 1878 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1879 } 1880 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1881 1882 /* set quantities in pcbddc data structure and store previous primal size */ 1883 /* n_vertices defines the number of subdomain corners in the primal space */ 1884 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1885 olocal_primal_size = pcbddc->local_primal_size; 1886 pcbddc->local_primal_size = total_counts; 1887 pcbddc->n_vertices = n_vertices; 1888 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1889 1890 /* Create constraint matrix */ 1891 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1892 /* so we need to set it up properly either with or without change of basis */ 1893 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1894 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1895 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1896 /* array to compute a local numbering of constraints : vertices first then constraints */ 1897 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1898 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1899 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1900 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1901 /* auxiliary stuff for basis change */ 1902 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1903 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1904 1905 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1906 total_primal_vertices=0; 1907 for (i=0;i<pcbddc->local_primal_size;i++) { 1908 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1909 if (size_of_constraint == 1) { 1910 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1911 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1912 aux_primal_minloc[total_primal_vertices]=0; 1913 total_primal_vertices++; 1914 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1915 PetscInt min_loc,min_index; 1916 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1917 /* find first untouched local node */ 1918 k = 0; 1919 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1920 min_index = global_indices[k]; 1921 min_loc = k; 1922 /* search the minimum among global nodes already untouched on the cc */ 1923 for (k=1;k<size_of_constraint;k++) { 1924 /* there can be more than one constraint on a single connected component */ 1925 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1926 min_index = global_indices[k]; 1927 min_loc = k; 1928 } 1929 } 1930 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1931 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1932 aux_primal_minloc[total_primal_vertices]=min_loc; 1933 total_primal_vertices++; 1934 } 1935 } 1936 /* determine if a QR strategy is needed for change of basis */ 1937 qr_needed = PETSC_FALSE; 1938 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1939 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1940 if (PetscBTLookup(change_basis,i)) { 1941 if (!pcbddc->use_qr_single) { 1942 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1943 j = 0; 1944 for (k=0;k<size_of_constraint;k++) { 1945 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1946 j++; 1947 } 1948 } 1949 /* found more than one primal dof on the cc */ 1950 if (j > 1) { 1951 PetscBTSet(qr_needed_idx,i); 1952 qr_needed = PETSC_TRUE; 1953 } 1954 } else { 1955 PetscBTSet(qr_needed_idx,i); 1956 qr_needed = PETSC_TRUE; 1957 } 1958 } 1959 } 1960 /* free workspace */ 1961 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1962 1963 /* permute indices in order to have a sorted set of vertices */ 1964 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1965 1966 /* nonzero structure of constraint matrix */ 1967 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1968 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1969 j=total_primal_vertices; 1970 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1971 if (!PetscBTLookup(change_basis,i)) { 1972 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1973 j++; 1974 } 1975 } 1976 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1977 ierr = PetscFree(nnz);CHKERRQ(ierr); 1978 /* set values in constraint matrix */ 1979 for (i=0;i<total_primal_vertices;i++) { 1980 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1981 } 1982 total_counts = total_primal_vertices; 1983 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1984 if (!PetscBTLookup(change_basis,i)) { 1985 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1986 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1987 total_counts++; 1988 } 1989 } 1990 /* assembling */ 1991 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1992 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1993 /* 1994 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1995 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1996 */ 1997 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1998 if (pcbddc->use_change_of_basis) { 1999 /* dual and primal dofs on a single cc */ 2000 PetscInt dual_dofs,primal_dofs; 2001 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2002 PetscInt primal_counter; 2003 /* working stuff for GEQRF */ 2004 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2005 PetscBLASInt lqr_work; 2006 /* working stuff for UNGQR */ 2007 PetscScalar *gqr_work,lgqr_work_t; 2008 PetscBLASInt lgqr_work; 2009 /* working stuff for TRTRS */ 2010 PetscScalar *trs_rhs; 2011 PetscBLASInt Blas_NRHS; 2012 /* pointers for values insertion into change of basis matrix */ 2013 PetscInt *start_rows,*start_cols; 2014 PetscScalar *start_vals; 2015 /* working stuff for values insertion */ 2016 PetscBT is_primal; 2017 /* matrix sizes */ 2018 PetscInt global_size,local_size; 2019 /* work array for nonzeros */ 2020 PetscScalar *nnz_array; 2021 /* temporary change of basis */ 2022 Mat localChangeOfBasisMatrix; 2023 /* auxiliary work for global change of basis */ 2024 Vec nnz_vec; 2025 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 2026 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 2027 PetscScalar *vals; 2028 PetscBool done; 2029 2030 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2031 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2032 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2033 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2034 2035 /* nonzeros for local mat */ 2036 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 2037 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 2038 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2039 if (PetscBTLookup(change_basis,i)) { 2040 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2041 if (PetscBTLookup(qr_needed_idx,i)) { 2042 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2043 } else { 2044 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 2045 /* get local primal index on the cc */ 2046 j = 0; 2047 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2048 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2049 } 2050 } 2051 } 2052 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2053 /* Set initial identity in the matrix */ 2054 for (i=0;i<pcis->n_B;i++) { 2055 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2056 } 2057 2058 if (pcbddc->dbg_flag) { 2059 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2060 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2061 } 2062 2063 2064 /* Now we loop on the constraints which need a change of basis */ 2065 /* 2066 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2067 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2068 2069 Basic blocks of change of basis matrix T computed by 2070 2071 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2072 2073 | 1 0 ... 0 s_1/S | 2074 | 0 1 ... 0 s_2/S | 2075 | ... | 2076 | 0 ... 1 s_{n-1}/S | 2077 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2078 2079 with S = \sum_{i=1}^n s_i^2 2080 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2081 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2082 2083 - QR decomposition of constraints otherwise 2084 */ 2085 if (qr_needed) { 2086 /* space to store Q */ 2087 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2088 /* first we issue queries for optimal work */ 2089 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2090 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2091 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2092 lqr_work = -1; 2093 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2094 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2095 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2096 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2097 lgqr_work = -1; 2098 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2099 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2100 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2101 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2102 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2103 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2104 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2105 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2106 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2107 /* array to store scaling factors for reflectors */ 2108 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2109 /* array to store rhs and solution of triangular solver */ 2110 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2111 /* allocating workspace for check */ 2112 if (pcbddc->dbg_flag) { 2113 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2114 } 2115 } 2116 /* array to store whether a node is primal or not */ 2117 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2118 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2119 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2120 if (i != total_primal_vertices) { 2121 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2122 } 2123 for (i=0;i<total_primal_vertices;i++) { 2124 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2125 } 2126 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2127 2128 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2129 /* -> using implicit ordering contained in temp_indices data */ 2130 total_counts = pcbddc->n_vertices; 2131 primal_counter = total_counts; 2132 while (total_counts<pcbddc->local_primal_size) { 2133 primal_dofs = 1; 2134 if (PetscBTLookup(change_basis,total_counts)) { 2135 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2136 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2137 primal_dofs++; 2138 } 2139 /* get constraint info */ 2140 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2141 dual_dofs = size_of_constraint-primal_dofs; 2142 2143 if (pcbddc->dbg_flag) { 2144 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2145 } 2146 2147 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2148 2149 /* copy quadrature constraints for change of basis check */ 2150 if (pcbddc->dbg_flag) { 2151 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2152 } 2153 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2154 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2155 2156 /* compute QR decomposition of constraints */ 2157 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2158 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2159 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2160 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2161 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2162 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2163 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2164 2165 /* explictly compute R^-T */ 2166 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2167 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2168 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2169 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2170 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2171 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2172 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2173 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2174 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2175 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2176 2177 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2178 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2179 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2180 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2181 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2182 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2183 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2184 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2185 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2186 2187 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2188 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2189 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2190 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2191 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2192 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2193 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2194 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2195 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2196 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2197 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2198 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2199 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2200 2201 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2202 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2203 /* insert cols for primal dofs */ 2204 for (j=0;j<primal_dofs;j++) { 2205 start_vals = &qr_basis[j*size_of_constraint]; 2206 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2207 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2208 } 2209 /* insert cols for dual dofs */ 2210 for (j=0,k=0;j<dual_dofs;k++) { 2211 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2212 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2213 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2214 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2215 j++; 2216 } 2217 } 2218 2219 /* check change of basis */ 2220 if (pcbddc->dbg_flag) { 2221 PetscInt ii,jj; 2222 PetscBool valid_qr=PETSC_TRUE; 2223 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2224 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2225 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2226 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2227 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2228 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2229 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2230 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2231 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2232 for (jj=0;jj<size_of_constraint;jj++) { 2233 for (ii=0;ii<primal_dofs;ii++) { 2234 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2235 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2236 } 2237 } 2238 if (!valid_qr) { 2239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2240 for (jj=0;jj<size_of_constraint;jj++) { 2241 for (ii=0;ii<primal_dofs;ii++) { 2242 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2243 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2244 } 2245 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2246 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2247 } 2248 } 2249 } 2250 } else { 2251 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2252 } 2253 } 2254 } else { /* simple transformation block */ 2255 PetscInt row,col; 2256 PetscScalar val,norm; 2257 2258 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2259 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2260 for (j=0;j<size_of_constraint;j++) { 2261 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2262 if (!PetscBTLookup(is_primal,row)) { 2263 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2264 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2265 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2266 } else { 2267 for (k=0;k<size_of_constraint;k++) { 2268 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2269 if (row != col) { 2270 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2271 } else { 2272 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2273 } 2274 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2275 } 2276 } 2277 } 2278 if (pcbddc->dbg_flag) { 2279 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2280 } 2281 } 2282 /* increment primal counter */ 2283 primal_counter += primal_dofs; 2284 } else { 2285 if (pcbddc->dbg_flag) { 2286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2287 } 2288 } 2289 /* increment constraint counter total_counts */ 2290 total_counts += primal_dofs; 2291 } 2292 2293 /* free workspace */ 2294 if (qr_needed) { 2295 if (pcbddc->dbg_flag) { 2296 ierr = PetscFree(work);CHKERRQ(ierr); 2297 } 2298 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2299 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2300 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2301 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2302 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2303 } 2304 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2305 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2306 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2307 2308 /* assembling of global change of variable */ 2309 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2310 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2311 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2312 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2313 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2314 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2315 2316 /* nonzeros (overestimated) */ 2317 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2318 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2319 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2320 for (i=0;i<pcis->n;i++) { 2321 nnz_array[i] = 1.0; 2322 idxs_all[i] = i; 2323 } 2324 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2325 for (i=0;i<pcis->n_B;i++) { 2326 nnz_array[idxs_B[i]] = nnz[i]; 2327 } 2328 if (pcis->n) { 2329 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2330 } 2331 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2332 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2333 ierr = PetscFree(nnz);CHKERRQ(ierr); 2334 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2335 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2336 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2337 for (i=0;i<local_size;i++) { 2338 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2339 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2340 } 2341 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2342 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2343 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2344 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2345 2346 /* Set identity on dirichlet dofs */ 2347 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2348 for (i=0;i<pcis->n-pcis->n_B;i++) { 2349 PetscScalar one=1.0; 2350 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2351 } 2352 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2353 2354 /* Set values at interface dofs */ 2355 done = PETSC_TRUE; 2356 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2357 if (!done) { 2358 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2359 } 2360 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2361 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2362 ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2363 for (i=0;i<nvtxs;i++) { 2364 PetscInt row,*cols,ncols; 2365 PetscScalar *mat_vals; 2366 2367 row = idxs_B[i]; 2368 ncols = xadj[i+1]-xadj[i]; 2369 cols = idxs_mapped+xadj[i]; 2370 mat_vals = vals+xadj[i]; 2371 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2372 } 2373 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2374 if (!done) { 2375 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2376 } 2377 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2378 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2379 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2380 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2381 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2382 2383 /* check */ 2384 if (pcbddc->dbg_flag) { 2385 PetscReal error; 2386 Vec x,x_change; 2387 2388 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2389 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2390 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2391 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2392 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2393 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2394 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2395 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2396 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2397 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2398 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2399 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2400 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2401 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2402 ierr = VecDestroy(&x);CHKERRQ(ierr); 2403 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2404 } 2405 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2406 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2407 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2408 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2409 } 2410 2411 /* set up change of basis context */ 2412 if (pcbddc->ChangeOfBasisMatrix) { 2413 PCBDDCChange_ctx change_ctx; 2414 2415 if (!pcbddc->new_global_mat) { 2416 PetscInt global_size,local_size; 2417 2418 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2419 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2420 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2421 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2422 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2423 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2424 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2425 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2426 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2427 } else { 2428 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2429 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2430 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2431 } 2432 if (!pcbddc->user_ChangeOfBasisMatrix) { 2433 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2434 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2435 } else { 2436 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2437 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2438 } 2439 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2440 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2441 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2442 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2443 } 2444 2445 /* get indices in local ordering for vertices and constraints */ 2446 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2447 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2448 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2449 } 2450 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2451 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2452 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2453 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2454 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2455 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2456 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2457 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2458 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2459 /* set quantities in PCBDDC data struct */ 2460 pcbddc->n_actual_vertices = i; 2461 /* check if a new primal space has been introduced */ 2462 pcbddc->new_primal_space_local = PETSC_TRUE; 2463 if (olocal_primal_size == pcbddc->local_primal_size) { 2464 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2465 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2466 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2467 } 2468 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2469 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2470 2471 /* flush dbg viewer */ 2472 if (pcbddc->dbg_flag) { 2473 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2474 } 2475 2476 /* free workspace */ 2477 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2478 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2479 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2480 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2481 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2482 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2488 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2489 { 2490 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2491 PC_IS *pcis = (PC_IS*)pc->data; 2492 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2493 PetscInt ierr,i,vertex_size; 2494 PetscViewer viewer=pcbddc->dbg_viewer; 2495 2496 PetscFunctionBegin; 2497 /* Reset previously computed graph */ 2498 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2499 /* Init local Graph struct */ 2500 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2501 2502 /* Check validity of the csr graph passed in by the user */ 2503 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2504 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2505 } 2506 2507 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2508 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2509 Mat mat_adj; 2510 PetscInt *xadj,*adjncy; 2511 PetscInt nvtxs; 2512 PetscBool flg_row=PETSC_TRUE; 2513 2514 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2515 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2516 if (!flg_row) { 2517 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2518 } 2519 if (pcbddc->use_local_adj) { 2520 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2521 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2522 } else { /* just compute subdomain's connected components */ 2523 IS is_dummy; 2524 ISLocalToGlobalMapping l2gmap_dummy; 2525 PetscInt j,sum; 2526 PetscInt *cxadj,*cadjncy; 2527 const PetscInt *idxs; 2528 PCBDDCGraph graph; 2529 PetscBT is_on_boundary; 2530 2531 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2532 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2533 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2534 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2535 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2536 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2537 graph->xadj = xadj; 2538 graph->adjncy = adjncy; 2539 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2540 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2541 2542 if (pcbddc->dbg_flag) { 2543 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2544 for (i=0;i<graph->ncc;i++) { 2545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2546 } 2547 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2548 } 2549 2550 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2551 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2552 for (i=0;i<pcis->n_B;i++) { 2553 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2554 } 2555 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2556 2557 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2558 sum = 0; 2559 for (i=0;i<graph->ncc;i++) { 2560 PetscInt sizecc = 0; 2561 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2562 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2563 sizecc++; 2564 } 2565 } 2566 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2567 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2568 cxadj[graph->queue[j]] = sizecc; 2569 } 2570 } 2571 sum += sizecc*sizecc; 2572 } 2573 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2574 sum = 0; 2575 for (i=0;i<nvtxs;i++) { 2576 PetscInt temp = cxadj[i]; 2577 cxadj[i] = sum; 2578 sum += temp; 2579 } 2580 cxadj[nvtxs] = sum; 2581 for (i=0;i<graph->ncc;i++) { 2582 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2583 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2584 PetscInt k,sizecc = 0; 2585 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2586 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2587 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2588 sizecc++; 2589 } 2590 } 2591 } 2592 } 2593 } 2594 if (nvtxs) { 2595 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2596 } else { 2597 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2598 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2599 } 2600 graph->xadj = 0; 2601 graph->adjncy = 0; 2602 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2603 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2604 } 2605 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2606 if (!flg_row) { 2607 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2608 } 2609 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2610 } 2611 2612 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2613 vertex_size = 1; 2614 if (pcbddc->user_provided_isfordofs) { 2615 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2616 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2617 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2618 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2619 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2620 } 2621 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2622 pcbddc->n_ISForDofs = 0; 2623 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2624 } 2625 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2626 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2627 } else { 2628 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2629 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2630 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2631 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2632 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2633 } 2634 } 2635 } 2636 2637 /* Setup of Graph */ 2638 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2639 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2640 } 2641 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2642 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2643 } 2644 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 2645 2646 /* Graph's connected components analysis */ 2647 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2648 2649 /* print some info to stdout */ 2650 if (pcbddc->dbg_flag) { 2651 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 2652 } 2653 2654 /* mark topography has done */ 2655 pcbddc->recompute_topography = PETSC_FALSE; 2656 PetscFunctionReturn(0); 2657 } 2658 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2661 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2662 { 2663 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2664 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2665 PetscErrorCode ierr; 2666 2667 PetscFunctionBegin; 2668 n = 0; 2669 vertices = 0; 2670 if (pcbddc->ConstraintMatrix) { 2671 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2672 for (i=0;i<local_primal_size;i++) { 2673 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2674 if (size_of_constraint == 1) n++; 2675 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2676 } 2677 if (vertices_idx) { 2678 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2679 n = 0; 2680 for (i=0;i<local_primal_size;i++) { 2681 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2682 if (size_of_constraint == 1) { 2683 vertices[n++]=row_cmat_indices[0]; 2684 } 2685 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2686 } 2687 } 2688 } 2689 *n_vertices = n; 2690 if (vertices_idx) *vertices_idx = vertices; 2691 PetscFunctionReturn(0); 2692 } 2693 2694 #undef __FUNCT__ 2695 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2696 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2697 { 2698 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2699 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2700 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2701 PetscBT touched; 2702 PetscErrorCode ierr; 2703 2704 /* This function assumes that the number of local constraints per connected component 2705 is not greater than the number of nodes defined for the connected component 2706 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2707 PetscFunctionBegin; 2708 n = 0; 2709 constraints_index = 0; 2710 if (pcbddc->ConstraintMatrix) { 2711 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2712 max_size_of_constraint = 0; 2713 for (i=0;i<local_primal_size;i++) { 2714 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2715 if (size_of_constraint > 1) { 2716 n++; 2717 } 2718 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2719 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2720 } 2721 if (constraints_idx) { 2722 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2723 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2724 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2725 n = 0; 2726 for (i=0;i<local_primal_size;i++) { 2727 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2728 if (size_of_constraint > 1) { 2729 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2730 /* find first untouched local node */ 2731 j = 0; 2732 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2733 min_index = row_cmat_global_indices[j]; 2734 min_loc = j; 2735 /* search the minimum among nodes not yet touched on the connected component 2736 since there can be more than one constraint on a single cc */ 2737 for (j=1;j<size_of_constraint;j++) { 2738 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2739 min_index = row_cmat_global_indices[j]; 2740 min_loc = j; 2741 } 2742 } 2743 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2744 constraints_index[n++] = row_cmat_indices[min_loc]; 2745 } 2746 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2747 } 2748 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2749 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2750 } 2751 } 2752 *n_constraints = n; 2753 if (constraints_idx) *constraints_idx = constraints_index; 2754 PetscFunctionReturn(0); 2755 } 2756 2757 #undef __FUNCT__ 2758 #define __FUNCT__ "PCBDDCSubsetNumbering" 2759 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2760 { 2761 Vec local_vec,global_vec; 2762 IS seqis,paris; 2763 VecScatter scatter_ctx; 2764 PetscScalar *array; 2765 PetscInt *temp_global_dofs; 2766 PetscScalar globalsum; 2767 PetscInt i,j,s; 2768 PetscInt nlocals,first_index,old_index,max_local,max_global; 2769 PetscMPIInt rank_prec_comm,size_prec_comm; 2770 PetscInt *dof_sizes,*dof_displs; 2771 PetscBool first_found; 2772 PetscErrorCode ierr; 2773 2774 PetscFunctionBegin; 2775 /* mpi buffers */ 2776 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2777 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2778 j = ( !rank_prec_comm ? size_prec_comm : 0); 2779 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 2780 /* get maximum size of subset */ 2781 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2782 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2783 max_local = 0; 2784 for (i=0;i<n_local_dofs;i++) { 2785 if (max_local < temp_global_dofs[i] ) { 2786 max_local = temp_global_dofs[i]; 2787 } 2788 } 2789 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2790 max_global++; 2791 max_local = 0; 2792 for (i=0;i<n_local_dofs;i++) { 2793 if (max_local < local_dofs[i] ) { 2794 max_local = local_dofs[i]; 2795 } 2796 } 2797 max_local++; 2798 /* allocate workspace */ 2799 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2800 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2801 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2802 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2803 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2804 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2805 /* create scatter */ 2806 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2807 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2808 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2809 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2810 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2811 /* init array */ 2812 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2813 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2814 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2815 if (local_dofs_mult) { 2816 for (i=0;i<n_local_dofs;i++) { 2817 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2818 } 2819 } else { 2820 for (i=0;i<n_local_dofs;i++) { 2821 array[local_dofs[i]]=1.0; 2822 } 2823 } 2824 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2825 /* scatter into global vec and get total number of global dofs */ 2826 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2827 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2828 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2829 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2830 /* Fill global_vec with cumulative function for global numbering */ 2831 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2832 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2833 nlocals = 0; 2834 first_index = -1; 2835 first_found = PETSC_FALSE; 2836 for (i=0;i<s;i++) { 2837 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2838 first_found = PETSC_TRUE; 2839 first_index = i; 2840 } 2841 nlocals += (PetscInt)PetscRealPart(array[i]); 2842 } 2843 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2844 if (!rank_prec_comm) { 2845 dof_displs[0]=0; 2846 for (i=1;i<size_prec_comm;i++) { 2847 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2848 } 2849 } 2850 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2851 if (first_found) { 2852 array[first_index] += (PetscScalar)nlocals; 2853 old_index = first_index; 2854 for (i=first_index+1;i<s;i++) { 2855 if (PetscRealPart(array[i]) > 0.1) { 2856 array[i] += array[old_index]; 2857 old_index = i; 2858 } 2859 } 2860 } 2861 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2862 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2863 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2864 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2865 /* get global ordering of local dofs */ 2866 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2867 if (local_dofs_mult) { 2868 for (i=0;i<n_local_dofs;i++) { 2869 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2870 } 2871 } else { 2872 for (i=0;i<n_local_dofs;i++) { 2873 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2874 } 2875 } 2876 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2877 /* free workspace */ 2878 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2879 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2880 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2881 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 2882 /* return pointer to global ordering of local dofs */ 2883 *global_numbering_subset = temp_global_dofs; 2884 PetscFunctionReturn(0); 2885 } 2886 2887 #undef __FUNCT__ 2888 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2889 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2890 { 2891 PetscInt i,j; 2892 PetscScalar *alphas; 2893 PetscErrorCode ierr; 2894 2895 PetscFunctionBegin; 2896 /* this implements stabilized Gram-Schmidt */ 2897 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2898 for (i=0;i<n;i++) { 2899 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2900 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2901 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2902 } 2903 ierr = PetscFree(alphas);CHKERRQ(ierr); 2904 PetscFunctionReturn(0); 2905 } 2906 2907 #undef __FUNCT__ 2908 #define __FUNCT__ "MatISGetSubassemblingPattern" 2909 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2910 { 2911 Mat subdomain_adj; 2912 IS new_ranks,ranks_send_to; 2913 MatPartitioning partitioner; 2914 Mat_IS *matis; 2915 PetscInt n_neighs,*neighs,*n_shared,**shared; 2916 PetscInt prank; 2917 PetscMPIInt size,rank,color; 2918 PetscInt *xadj,*adjncy,*oldranks; 2919 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2920 PetscInt i,local_size,threshold=0; 2921 PetscErrorCode ierr; 2922 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2923 PetscSubcomm subcomm; 2924 2925 PetscFunctionBegin; 2926 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2927 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2928 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2929 2930 /* Get info on mapping */ 2931 matis = (Mat_IS*)(mat->data); 2932 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2933 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2934 2935 /* build local CSR graph of subdomains' connectivity */ 2936 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2937 xadj[0] = 0; 2938 xadj[1] = PetscMax(n_neighs-1,0); 2939 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2940 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2941 2942 if (threshold) { 2943 PetscInt xadj_count = 0; 2944 for (i=1;i<n_neighs;i++) { 2945 if (n_shared[i] > threshold) { 2946 adjncy[xadj_count] = neighs[i]; 2947 adjncy_wgt[xadj_count] = n_shared[i]; 2948 xadj_count++; 2949 } 2950 } 2951 xadj[1] = xadj_count; 2952 } else { 2953 if (xadj[1]) { 2954 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2955 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2956 } 2957 } 2958 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2959 if (use_square) { 2960 for (i=0;i<xadj[1];i++) { 2961 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2962 } 2963 } 2964 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2965 2966 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 2967 2968 /* 2969 Restrict work on active processes only. 2970 */ 2971 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2972 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2973 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2974 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2975 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2976 if (color) { 2977 ierr = PetscFree(xadj);CHKERRQ(ierr); 2978 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2979 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2980 } else { 2981 PetscInt coarsening_ratio; 2982 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 2983 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2984 prank = rank; 2985 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 2986 /* 2987 for (i=0;i<size;i++) { 2988 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2989 } 2990 */ 2991 for (i=0;i<xadj[1];i++) { 2992 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2993 } 2994 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2995 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2996 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2997 2998 /* Partition */ 2999 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3000 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3001 if (use_vwgt) { 3002 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3003 v_wgt[0] = local_size; 3004 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3005 } 3006 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3007 coarsening_ratio = size/n_subdomains; 3008 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3009 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3010 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3011 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3012 3013 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3014 if (contiguous) { 3015 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3016 } else { 3017 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3018 } 3019 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3020 /* clean up */ 3021 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3022 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3023 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3024 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3025 } 3026 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3027 3028 /* assemble parallel IS for sends */ 3029 i = 1; 3030 if (color) i=0; 3031 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3032 3033 /* get back IS */ 3034 *is_sends = ranks_send_to; 3035 PetscFunctionReturn(0); 3036 } 3037 3038 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3039 3040 #undef __FUNCT__ 3041 #define __FUNCT__ "MatISSubassemble" 3042 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3043 { 3044 Mat local_mat; 3045 Mat_IS *matis; 3046 IS is_sends_internal; 3047 PetscInt rows,cols,new_local_rows; 3048 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3049 PetscBool ismatis,isdense,newisdense,destroy_mat; 3050 ISLocalToGlobalMapping l2gmap; 3051 PetscInt* l2gmap_indices; 3052 const PetscInt* is_indices; 3053 MatType new_local_type; 3054 /* buffers */ 3055 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3056 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3057 PetscInt *recv_buffer_idxs_local; 3058 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3059 /* MPI */ 3060 MPI_Comm comm,comm_n; 3061 PetscSubcomm subcomm; 3062 PetscMPIInt n_sends,n_recvs,commsize; 3063 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3064 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3065 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3066 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3067 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3068 PetscErrorCode ierr; 3069 3070 PetscFunctionBegin; 3071 /* TODO: add missing checks */ 3072 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3073 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3074 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3075 PetscValidLogicalCollectiveInt(mat,nis,7); 3076 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3077 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3078 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3079 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3080 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3081 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3082 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3083 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3084 PetscInt mrows,mcols,mnrows,mncols; 3085 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3086 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3087 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3088 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3089 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3090 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3091 } 3092 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3093 PetscValidLogicalCollectiveInt(mat,bs,0); 3094 /* prepare IS for sending if not provided */ 3095 if (!is_sends) { 3096 PetscBool pcontig = PETSC_TRUE; 3097 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3098 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3099 } else { 3100 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3101 is_sends_internal = is_sends; 3102 } 3103 3104 /* get pointer of MATIS data */ 3105 matis = (Mat_IS*)mat->data; 3106 3107 /* get comm */ 3108 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3109 3110 /* compute number of sends */ 3111 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3112 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3113 3114 /* compute number of receives */ 3115 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3116 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3117 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3118 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3119 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3120 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3121 ierr = PetscFree(iflags);CHKERRQ(ierr); 3122 3123 /* restrict comm if requested */ 3124 subcomm = 0; 3125 destroy_mat = PETSC_FALSE; 3126 if (restrict_comm) { 3127 PetscMPIInt color,rank,subcommsize; 3128 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3129 color = 0; 3130 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3131 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3132 subcommsize = commsize - subcommsize; 3133 /* check if reuse has been requested */ 3134 if (reuse == MAT_REUSE_MATRIX) { 3135 if (*mat_n) { 3136 PetscMPIInt subcommsize2; 3137 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3138 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3139 comm_n = PetscObjectComm((PetscObject)*mat_n); 3140 } else { 3141 comm_n = PETSC_COMM_SELF; 3142 } 3143 } else { /* MAT_INITIAL_MATRIX */ 3144 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3145 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3146 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3147 comm_n = PetscSubcommChild(subcomm); 3148 } 3149 /* flag to destroy *mat_n if not significative */ 3150 if (color) destroy_mat = PETSC_TRUE; 3151 } else { 3152 comm_n = comm; 3153 } 3154 3155 /* prepare send/receive buffers */ 3156 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3157 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3158 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3159 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3160 if (nis) { 3161 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3162 } 3163 3164 /* Get data from local matrices */ 3165 if (!isdense) { 3166 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3167 /* TODO: See below some guidelines on how to prepare the local buffers */ 3168 /* 3169 send_buffer_vals should contain the raw values of the local matrix 3170 send_buffer_idxs should contain: 3171 - MatType_PRIVATE type 3172 - PetscInt size_of_l2gmap 3173 - PetscInt global_row_indices[size_of_l2gmap] 3174 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3175 */ 3176 } else { 3177 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3178 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3179 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3180 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3181 send_buffer_idxs[1] = i; 3182 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3183 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3184 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3185 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3186 for (i=0;i<n_sends;i++) { 3187 ilengths_vals[is_indices[i]] = len*len; 3188 ilengths_idxs[is_indices[i]] = len+2; 3189 } 3190 } 3191 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3192 /* additional is (if any) */ 3193 if (nis) { 3194 PetscMPIInt psum; 3195 PetscInt j; 3196 for (j=0,psum=0;j<nis;j++) { 3197 PetscInt plen; 3198 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3199 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3200 psum += len+1; /* indices + lenght */ 3201 } 3202 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3203 for (j=0,psum=0;j<nis;j++) { 3204 PetscInt plen; 3205 const PetscInt *is_array_idxs; 3206 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3207 send_buffer_idxs_is[psum] = plen; 3208 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3209 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3210 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3211 psum += plen+1; /* indices + lenght */ 3212 } 3213 for (i=0;i<n_sends;i++) { 3214 ilengths_idxs_is[is_indices[i]] = psum; 3215 } 3216 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3217 } 3218 3219 buf_size_idxs = 0; 3220 buf_size_vals = 0; 3221 buf_size_idxs_is = 0; 3222 for (i=0;i<n_recvs;i++) { 3223 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3224 buf_size_vals += (PetscInt)olengths_vals[i]; 3225 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3226 } 3227 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3228 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3229 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3230 3231 /* get new tags for clean communications */ 3232 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3233 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3234 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3235 3236 /* allocate for requests */ 3237 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3238 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3239 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3240 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3241 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3242 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3243 3244 /* communications */ 3245 ptr_idxs = recv_buffer_idxs; 3246 ptr_vals = recv_buffer_vals; 3247 ptr_idxs_is = recv_buffer_idxs_is; 3248 for (i=0;i<n_recvs;i++) { 3249 source_dest = onodes[i]; 3250 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3251 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3252 ptr_idxs += olengths_idxs[i]; 3253 ptr_vals += olengths_vals[i]; 3254 if (nis) { 3255 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3256 ptr_idxs_is += olengths_idxs_is[i]; 3257 } 3258 } 3259 for (i=0;i<n_sends;i++) { 3260 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3261 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3262 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3263 if (nis) { 3264 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3265 } 3266 } 3267 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3268 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3269 3270 /* assemble new l2g map */ 3271 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3272 ptr_idxs = recv_buffer_idxs; 3273 new_local_rows = 0; 3274 for (i=0;i<n_recvs;i++) { 3275 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3276 ptr_idxs += olengths_idxs[i]; 3277 } 3278 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3279 ptr_idxs = recv_buffer_idxs; 3280 new_local_rows = 0; 3281 for (i=0;i<n_recvs;i++) { 3282 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3283 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3284 ptr_idxs += olengths_idxs[i]; 3285 } 3286 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3287 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3288 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3289 3290 /* infer new local matrix type from received local matrices type */ 3291 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3292 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3293 if (n_recvs) { 3294 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3295 ptr_idxs = recv_buffer_idxs; 3296 for (i=0;i<n_recvs;i++) { 3297 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3298 new_local_type_private = MATAIJ_PRIVATE; 3299 break; 3300 } 3301 ptr_idxs += olengths_idxs[i]; 3302 } 3303 switch (new_local_type_private) { 3304 case MATDENSE_PRIVATE: 3305 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3306 new_local_type = MATSEQAIJ; 3307 bs = 1; 3308 } else { /* if I receive only 1 dense matrix */ 3309 new_local_type = MATSEQDENSE; 3310 bs = 1; 3311 } 3312 break; 3313 case MATAIJ_PRIVATE: 3314 new_local_type = MATSEQAIJ; 3315 bs = 1; 3316 break; 3317 case MATBAIJ_PRIVATE: 3318 new_local_type = MATSEQBAIJ; 3319 break; 3320 case MATSBAIJ_PRIVATE: 3321 new_local_type = MATSEQSBAIJ; 3322 break; 3323 default: 3324 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3325 break; 3326 } 3327 } else { /* by default, new_local_type is seqdense */ 3328 new_local_type = MATSEQDENSE; 3329 bs = 1; 3330 } 3331 3332 /* create MATIS object if needed */ 3333 if (reuse == MAT_INITIAL_MATRIX) { 3334 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3335 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3336 } else { 3337 /* it also destroys the local matrices */ 3338 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3339 } 3340 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3341 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3342 3343 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3344 3345 /* Global to local map of received indices */ 3346 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3347 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3348 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3349 3350 /* restore attributes -> type of incoming data and its size */ 3351 buf_size_idxs = 0; 3352 for (i=0;i<n_recvs;i++) { 3353 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3354 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3355 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3356 } 3357 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3358 3359 /* set preallocation */ 3360 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3361 if (!newisdense) { 3362 PetscInt *new_local_nnz=0; 3363 3364 ptr_vals = recv_buffer_vals; 3365 ptr_idxs = recv_buffer_idxs_local; 3366 if (n_recvs) { 3367 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3368 } 3369 for (i=0;i<n_recvs;i++) { 3370 PetscInt j; 3371 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3372 for (j=0;j<*(ptr_idxs+1);j++) { 3373 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3374 } 3375 } else { 3376 /* TODO */ 3377 } 3378 ptr_idxs += olengths_idxs[i]; 3379 } 3380 if (new_local_nnz) { 3381 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3382 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3383 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3384 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3385 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3386 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3387 } else { 3388 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3389 } 3390 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3391 } else { 3392 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3393 } 3394 3395 /* set values */ 3396 ptr_vals = recv_buffer_vals; 3397 ptr_idxs = recv_buffer_idxs_local; 3398 for (i=0;i<n_recvs;i++) { 3399 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3400 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3401 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3402 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3403 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3404 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3405 } else { 3406 /* TODO */ 3407 } 3408 ptr_idxs += olengths_idxs[i]; 3409 ptr_vals += olengths_vals[i]; 3410 } 3411 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3412 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3413 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3414 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3415 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3416 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3417 3418 #if 0 3419 if (!restrict_comm) { /* check */ 3420 Vec lvec,rvec; 3421 PetscReal infty_error; 3422 3423 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3424 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3425 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3426 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3427 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3428 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3429 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3430 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3431 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3432 } 3433 #endif 3434 3435 /* assemble new additional is (if any) */ 3436 if (nis) { 3437 PetscInt **temp_idxs,*count_is,j,psum; 3438 3439 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3440 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3441 ptr_idxs = recv_buffer_idxs_is; 3442 psum = 0; 3443 for (i=0;i<n_recvs;i++) { 3444 for (j=0;j<nis;j++) { 3445 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3446 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3447 psum += plen; 3448 ptr_idxs += plen+1; /* shift pointer to received data */ 3449 } 3450 } 3451 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3452 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3453 for (i=1;i<nis;i++) { 3454 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3455 } 3456 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3457 ptr_idxs = recv_buffer_idxs_is; 3458 for (i=0;i<n_recvs;i++) { 3459 for (j=0;j<nis;j++) { 3460 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3461 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3462 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3463 ptr_idxs += plen+1; /* shift pointer to received data */ 3464 } 3465 } 3466 for (i=0;i<nis;i++) { 3467 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3468 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3469 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3470 } 3471 ierr = PetscFree(count_is);CHKERRQ(ierr); 3472 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3473 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3474 } 3475 /* free workspace */ 3476 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3477 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3478 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3479 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3480 if (isdense) { 3481 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3482 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3483 } else { 3484 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3485 } 3486 if (nis) { 3487 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3488 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3489 } 3490 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3491 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3492 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3493 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3494 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3495 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3496 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3497 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3498 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3499 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3500 ierr = PetscFree(onodes);CHKERRQ(ierr); 3501 if (nis) { 3502 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3503 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3504 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3505 } 3506 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3507 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3508 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3509 for (i=0;i<nis;i++) { 3510 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3511 } 3512 } 3513 PetscFunctionReturn(0); 3514 } 3515 3516 /* temporary hack into ksp private data structure */ 3517 #include <petsc/private/kspimpl.h> 3518 3519 #undef __FUNCT__ 3520 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3521 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3522 { 3523 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3524 PC_IS *pcis = (PC_IS*)pc->data; 3525 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3526 MatNullSpace CoarseNullSpace=NULL; 3527 ISLocalToGlobalMapping coarse_islg; 3528 IS coarse_is,*isarray; 3529 PetscInt i,im_active=-1,active_procs=-1; 3530 PetscInt nis,nisdofs,nisneu; 3531 PC pc_temp; 3532 PCType coarse_pc_type; 3533 KSPType coarse_ksp_type; 3534 PetscBool multilevel_requested,multilevel_allowed; 3535 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3536 Mat t_coarse_mat_is; 3537 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3538 PetscMPIInt all_procs; 3539 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3540 PetscBool compute_vecs = PETSC_FALSE; 3541 PetscScalar *array; 3542 PetscErrorCode ierr; 3543 3544 PetscFunctionBegin; 3545 /* Assign global numbering to coarse dofs */ 3546 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3547 PetscInt ocoarse_size; 3548 compute_vecs = PETSC_TRUE; 3549 ocoarse_size = pcbddc->coarse_size; 3550 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3551 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3552 /* see if we can avoid some work */ 3553 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3554 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3555 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3556 coarse_reuse = PETSC_FALSE; 3557 } else { /* we can safely reuse already computed coarse matrix */ 3558 coarse_reuse = PETSC_TRUE; 3559 } 3560 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3561 coarse_reuse = PETSC_FALSE; 3562 } 3563 /* reset any subassembling information */ 3564 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3565 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3566 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3567 coarse_reuse = PETSC_TRUE; 3568 } 3569 3570 /* count "active" (i.e. with positive local size) and "void" processes */ 3571 im_active = !!(pcis->n); 3572 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3573 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3574 void_procs = all_procs-active_procs; 3575 csin_type_simple = PETSC_TRUE; 3576 redist = PETSC_FALSE; 3577 if (pcbddc->current_level && void_procs) { 3578 csin_ml = PETSC_TRUE; 3579 ncoarse_ml = void_procs; 3580 csin_ds = PETSC_TRUE; 3581 ncoarse_ds = void_procs; 3582 } else { 3583 csin_ml = PETSC_FALSE; 3584 ncoarse_ml = all_procs; 3585 if (void_procs) { 3586 csin_ds = PETSC_TRUE; 3587 ncoarse_ds = void_procs; 3588 csin_type_simple = PETSC_FALSE; 3589 } else { 3590 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3591 csin_ds = PETSC_TRUE; 3592 ncoarse_ds = pcbddc->redistribute_coarse; 3593 redist = PETSC_TRUE; 3594 } else { 3595 csin_ds = PETSC_FALSE; 3596 ncoarse_ds = all_procs; 3597 } 3598 } 3599 } 3600 3601 /* 3602 test if we can go multilevel: three conditions must be satisfied: 3603 - we have not exceeded the number of levels requested 3604 - we can actually subassemble the active processes 3605 - we can find a suitable number of MPI processes where we can place the subassembled problem 3606 */ 3607 multilevel_allowed = PETSC_FALSE; 3608 multilevel_requested = PETSC_FALSE; 3609 if (pcbddc->current_level < pcbddc->max_levels) { 3610 multilevel_requested = PETSC_TRUE; 3611 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3612 multilevel_allowed = PETSC_FALSE; 3613 } else { 3614 multilevel_allowed = PETSC_TRUE; 3615 } 3616 } 3617 /* determine number of process partecipating to coarse solver */ 3618 if (multilevel_allowed) { 3619 ncoarse = ncoarse_ml; 3620 csin = csin_ml; 3621 } else { 3622 ncoarse = ncoarse_ds; 3623 csin = csin_ds; 3624 } 3625 3626 /* creates temporary l2gmap and IS for coarse indexes */ 3627 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3628 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3629 3630 /* creates temporary MATIS object for coarse matrix */ 3631 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3632 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3633 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3634 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3635 #if 0 3636 { 3637 PetscViewer viewer; 3638 char filename[256]; 3639 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3640 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3641 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3642 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3643 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3644 } 3645 #endif 3646 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3647 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3648 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3649 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3650 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3651 3652 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3653 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3654 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3655 const PetscInt *idxs; 3656 ISLocalToGlobalMapping tmap; 3657 3658 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3659 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3660 /* allocate space for temporary storage */ 3661 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3662 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3663 /* allocate for IS array */ 3664 nisdofs = pcbddc->n_ISForDofsLocal; 3665 nisneu = !!pcbddc->NeumannBoundariesLocal; 3666 nis = nisdofs + nisneu; 3667 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3668 /* dofs splitting */ 3669 for (i=0;i<nisdofs;i++) { 3670 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3671 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3672 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3673 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3674 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3675 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3676 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3677 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3678 } 3679 /* neumann boundaries */ 3680 if (pcbddc->NeumannBoundariesLocal) { 3681 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3682 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3683 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3684 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3685 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3686 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3687 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3688 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3689 } 3690 /* free memory */ 3691 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3692 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3693 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3694 } else { 3695 nis = 0; 3696 nisdofs = 0; 3697 nisneu = 0; 3698 isarray = NULL; 3699 } 3700 /* destroy no longer needed map */ 3701 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3702 3703 /* restrict on coarse candidates (if needed) */ 3704 coarse_mat_is = NULL; 3705 if (csin) { 3706 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3707 if (redist) { 3708 PetscMPIInt rank; 3709 PetscInt spc,n_spc_p1,dest[1]; 3710 3711 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3712 spc = all_procs/pcbddc->redistribute_coarse; 3713 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 3714 if (rank > n_spc_p1*(spc+1)-1) { 3715 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3716 } else { 3717 dest[0] = rank/(spc+1); 3718 } 3719 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3720 } else { 3721 PetscInt j,tissize,*nisindices; 3722 PetscInt *coarse_candidates; 3723 const PetscInt* tisindices; 3724 /* get coarse candidates' ranks in pc communicator */ 3725 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 3726 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3727 for (i=0,j=0;i<all_procs;i++) { 3728 if (!coarse_candidates[i]) { 3729 coarse_candidates[j]=i; 3730 j++; 3731 } 3732 } 3733 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3734 /* get a suitable subassembling pattern */ 3735 if (csin_type_simple) { 3736 PetscMPIInt rank; 3737 PetscInt issize,isidx; 3738 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3739 if (im_active) { 3740 issize = 1; 3741 isidx = (PetscInt)rank; 3742 } else { 3743 issize = 0; 3744 isidx = -1; 3745 } 3746 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3747 } else { 3748 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3749 } 3750 if (pcbddc->dbg_flag) { 3751 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3752 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3753 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3754 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3755 for (i=0;i<j;i++) { 3756 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3757 } 3758 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3759 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3760 } 3761 /* shift the pattern on coarse candidates */ 3762 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3763 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3764 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 3765 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3766 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3767 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3768 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3769 } 3770 } 3771 if (pcbddc->dbg_flag) { 3772 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3773 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3774 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3775 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3776 } 3777 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3778 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3779 } else { 3780 if (pcbddc->dbg_flag) { 3781 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3782 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3783 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3784 } 3785 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3786 coarse_mat_is = t_coarse_mat_is; 3787 } 3788 3789 /* create local to global scatters for coarse problem */ 3790 if (compute_vecs) { 3791 PetscInt lrows; 3792 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3793 if (coarse_mat_is) { 3794 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3795 } else { 3796 lrows = 0; 3797 } 3798 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3799 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3800 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3801 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3802 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3803 } 3804 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3805 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3806 3807 /* set defaults for coarse KSP and PC */ 3808 if (multilevel_allowed) { 3809 coarse_ksp_type = KSPRICHARDSON; 3810 coarse_pc_type = PCBDDC; 3811 } else { 3812 coarse_ksp_type = KSPPREONLY; 3813 coarse_pc_type = PCREDUNDANT; 3814 } 3815 3816 /* print some info if requested */ 3817 if (pcbddc->dbg_flag) { 3818 if (!multilevel_allowed) { 3819 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3820 if (multilevel_requested) { 3821 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3822 } else if (pcbddc->max_levels) { 3823 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3824 } 3825 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3826 } 3827 } 3828 3829 /* create the coarse KSP object only once with defaults */ 3830 if (coarse_mat_is) { 3831 MatReuse coarse_mat_reuse; 3832 PetscViewer dbg_viewer = NULL; 3833 if (pcbddc->dbg_flag) { 3834 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3835 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3836 } 3837 if (!pcbddc->coarse_ksp) { 3838 char prefix[256],str_level[16]; 3839 size_t len; 3840 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3841 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 3842 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3843 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3844 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3845 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3846 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3847 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3848 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3849 /* prefix */ 3850 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3851 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3852 if (!pcbddc->current_level) { 3853 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3854 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3855 } else { 3856 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3857 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3858 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3859 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3860 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3861 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3862 } 3863 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3864 /* allow user customization */ 3865 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3866 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3867 } 3868 3869 /* get some info after set from options */ 3870 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3871 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3872 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3873 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3874 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3875 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3876 isbddc = PETSC_FALSE; 3877 } 3878 if (isredundant) { 3879 KSP inner_ksp; 3880 PC inner_pc; 3881 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3882 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3883 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3884 } 3885 3886 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3887 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3888 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3889 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3890 if (nisdofs) { 3891 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3892 for (i=0;i<nisdofs;i++) { 3893 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3894 } 3895 } 3896 if (nisneu) { 3897 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3898 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3899 } 3900 3901 /* assemble coarse matrix */ 3902 if (coarse_reuse) { 3903 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3904 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3905 coarse_mat_reuse = MAT_REUSE_MATRIX; 3906 } else { 3907 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3908 } 3909 if (isbddc || isnn) { 3910 if (pcbddc->coarsening_ratio > 1) { 3911 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3912 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3913 if (pcbddc->dbg_flag) { 3914 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3915 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3916 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3917 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3918 } 3919 } 3920 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3921 } else { 3922 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 3923 coarse_mat = coarse_mat_is; 3924 } 3925 } else { 3926 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3927 } 3928 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3929 3930 /* propagate symmetry info to coarse matrix */ 3931 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3932 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3933 3934 /* set operators */ 3935 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3936 if (pcbddc->dbg_flag) { 3937 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3938 } 3939 } else { /* processes non partecipating to coarse solver (if any) */ 3940 coarse_mat = 0; 3941 } 3942 ierr = PetscFree(isarray);CHKERRQ(ierr); 3943 #if 0 3944 { 3945 PetscViewer viewer; 3946 char filename[256]; 3947 sprintf(filename,"coarse_mat.m"); 3948 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3949 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3950 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3951 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3952 } 3953 #endif 3954 3955 /* Compute coarse null space (special handling by BDDC only) */ 3956 if (pcbddc->NullSpace) { 3957 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3958 } 3959 3960 if (pcbddc->coarse_ksp) { 3961 Vec crhs,csol; 3962 PetscBool ispreonly; 3963 if (CoarseNullSpace) { 3964 if (isbddc) { 3965 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3966 } else { 3967 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 3968 } 3969 } 3970 /* setup coarse ksp */ 3971 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3972 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3973 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3974 /* hack */ 3975 if (!csol) { 3976 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3977 } 3978 if (!crhs) { 3979 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3980 } 3981 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3982 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3983 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3984 KSP check_ksp; 3985 KSPType check_ksp_type; 3986 PC check_pc; 3987 Vec check_vec,coarse_vec; 3988 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3989 PetscInt its; 3990 PetscBool compute_eigs; 3991 PetscReal *eigs_r,*eigs_c; 3992 PetscInt neigs; 3993 const char *prefix; 3994 3995 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3996 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3997 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 3998 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3999 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4000 if (ispreonly) { 4001 check_ksp_type = KSPPREONLY; 4002 compute_eigs = PETSC_FALSE; 4003 } else { 4004 check_ksp_type = KSPGMRES; 4005 compute_eigs = PETSC_TRUE; 4006 } 4007 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4008 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4009 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4010 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4011 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4012 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4013 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4014 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4015 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4016 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4017 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4018 /* create random vec */ 4019 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4020 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4021 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4022 if (CoarseNullSpace) { 4023 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4024 } 4025 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4026 /* solve coarse problem */ 4027 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4028 if (CoarseNullSpace) { 4029 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4030 } 4031 /* set eigenvalue estimation if preonly has not been requested */ 4032 if (compute_eigs) { 4033 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4034 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4035 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4036 lambda_max = eigs_r[neigs-1]; 4037 lambda_min = eigs_r[0]; 4038 if (pcbddc->use_coarse_estimates) { 4039 if (lambda_max>lambda_min) { 4040 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4041 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4042 } 4043 } 4044 } 4045 4046 /* check coarse problem residual error */ 4047 if (pcbddc->dbg_flag) { 4048 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4049 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4050 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4051 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4052 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4053 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4054 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4055 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4056 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4057 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4058 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4059 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4060 if (compute_eigs) { 4061 PetscReal lambda_max_s,lambda_min_s; 4062 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4063 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4064 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4065 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4066 for (i=0;i<neigs;i++) { 4067 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4068 } 4069 } 4070 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4071 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4072 } 4073 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4074 if (compute_eigs) { 4075 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4076 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4077 } 4078 } 4079 } 4080 /* print additional info */ 4081 if (pcbddc->dbg_flag) { 4082 /* waits until all processes reaches this point */ 4083 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4084 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4086 } 4087 4088 /* free memory */ 4089 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4090 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4091 PetscFunctionReturn(0); 4092 } 4093 4094 #undef __FUNCT__ 4095 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4096 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4097 { 4098 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4099 PC_IS* pcis = (PC_IS*)pc->data; 4100 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4101 PetscInt i,coarse_size; 4102 PetscInt *local_primal_indices; 4103 PetscErrorCode ierr; 4104 4105 PetscFunctionBegin; 4106 /* Compute global number of coarse dofs */ 4107 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4108 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4109 } 4110 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4111 4112 /* check numbering */ 4113 if (pcbddc->dbg_flag) { 4114 PetscScalar coarsesum,*array; 4115 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4116 4117 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4118 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4120 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4121 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4122 for (i=0;i<pcbddc->local_primal_size;i++) { 4123 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4124 } 4125 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4126 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4127 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4128 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4129 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4130 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4131 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4132 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4133 for (i=0;i<pcis->n;i++) { 4134 if (array[i] == 1.0) { 4135 set_error = PETSC_TRUE; 4136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4137 } 4138 } 4139 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4140 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4141 for (i=0;i<pcis->n;i++) { 4142 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4143 } 4144 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4145 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4146 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4147 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4148 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4150 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4151 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4152 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4153 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4154 for (i=0;i<pcbddc->local_primal_size;i++) { 4155 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 4156 } 4157 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4158 } 4159 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4160 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4161 } 4162 /* get back data */ 4163 *coarse_size_n = coarse_size; 4164 *local_primal_indices_n = local_primal_indices; 4165 PetscFunctionReturn(0); 4166 } 4167 4168 #undef __FUNCT__ 4169 #define __FUNCT__ "PCBDDCGlobalToLocal" 4170 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4171 { 4172 IS localis_t; 4173 PetscInt i,lsize,*idxs,n; 4174 PetscScalar *vals; 4175 PetscErrorCode ierr; 4176 4177 PetscFunctionBegin; 4178 /* get indices in local ordering exploiting local to global map */ 4179 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4180 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4181 for (i=0;i<lsize;i++) vals[i] = 1.0; 4182 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4183 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4184 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4185 if (idxs) { /* multilevel guard */ 4186 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4187 } 4188 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4189 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4190 ierr = PetscFree(vals);CHKERRQ(ierr); 4191 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4192 /* now compute set in local ordering */ 4193 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4194 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4195 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4196 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4197 for (i=0,lsize=0;i<n;i++) { 4198 if (PetscRealPart(vals[i]) > 0.5) { 4199 lsize++; 4200 } 4201 } 4202 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4203 for (i=0,lsize=0;i<n;i++) { 4204 if (PetscRealPart(vals[i]) > 0.5) { 4205 idxs[lsize++] = i; 4206 } 4207 } 4208 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4209 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4210 *localis = localis_t; 4211 PetscFunctionReturn(0); 4212 } 4213 4214 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4215 #undef __FUNCT__ 4216 #define __FUNCT__ "PCBDDCMatMult_Private" 4217 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4218 { 4219 PCBDDCChange_ctx change_ctx; 4220 PetscErrorCode ierr; 4221 4222 PetscFunctionBegin; 4223 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4224 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4225 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4226 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4227 PetscFunctionReturn(0); 4228 } 4229 4230 #undef __FUNCT__ 4231 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4232 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4233 { 4234 PCBDDCChange_ctx change_ctx; 4235 PetscErrorCode ierr; 4236 4237 PetscFunctionBegin; 4238 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4239 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4240 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4241 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4242 PetscFunctionReturn(0); 4243 } 4244