1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCSetUpSolvers" 10 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscScalar *coarse_submat_vals; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 /* Compute matrix after change of basis and extract local submatrices */ 18 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D */ 21 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 22 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 23 24 /* Setup local solvers ksp_D and ksp_R */ 25 /* PCBDDCSetUpLocalScatters should be called first! */ 26 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 27 28 /* Change global null space passed in by the user if change of basis has been requested */ 29 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 30 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 31 } 32 33 /* 34 Setup local correction and local part of coarse basis. 35 Gives back the dense local part of the coarse matrix in column major ordering 36 */ 37 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 38 39 /* Compute total number of coarse nodes and setup coarse solver */ 40 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 41 42 /* free */ 43 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "PCBDDCResetCustomization" 49 PetscErrorCode PCBDDCResetCustomization(PC pc) 50 { 51 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 57 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 61 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 62 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 63 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 64 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 65 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCResetTopography" 71 PetscErrorCode PCBDDCResetTopography(PC pc) 72 { 73 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 78 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 79 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 80 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 #undef __FUNCT__ 85 #define __FUNCT__ "PCBDDCResetSolvers" 86 PetscErrorCode PCBDDCResetSolvers(PC pc) 87 { 88 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 93 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 101 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 109 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 110 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 111 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 112 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 113 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 114 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 115 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 122 { 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PC_IS *pcis = (PC_IS*)pc->data; 125 VecType impVecType; 126 PetscInt n_constraints,n_R,old_size; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 if (!pcbddc->ConstraintMatrix) { 131 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 132 } 133 /* get sizes */ 134 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 135 n_R = pcis->n-pcbddc->n_actual_vertices; 136 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 137 /* local work vectors (try to avoid unneeded work)*/ 138 /* R nodes */ 139 old_size = -1; 140 if (pcbddc->vec1_R) { 141 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 142 } 143 if (n_R != old_size) { 144 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 146 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 147 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 148 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 149 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 150 } 151 /* local primal dofs */ 152 old_size = -1; 153 if (pcbddc->vec1_P) { 154 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 155 } 156 if (pcbddc->local_primal_size != old_size) { 157 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 158 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 159 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 160 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 161 } 162 /* local explicit constraints */ 163 old_size = -1; 164 if (pcbddc->vec1_C) { 165 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 166 } 167 if (n_constraints && n_constraints != old_size) { 168 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 169 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 170 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 171 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "PCBDDCSetUpCorrection" 178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 179 { 180 PetscErrorCode ierr; 181 /* pointers to pcis and pcbddc */ 182 PC_IS* pcis = (PC_IS*)pc->data; 183 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 184 /* submatrices of local problem */ 185 Mat A_RV,A_VR,A_VV; 186 /* working matrices */ 187 Mat M1,M2,M3,C_CR; 188 /* working vectors */ 189 Vec vec1_C,vec2_C,vec1_V,vec2_V; 190 /* additional working stuff */ 191 IS is_aux; 192 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 193 const PetscScalar *array,*row_cmat_values; 194 const PetscInt *row_cmat_indices,*idx_R_local; 195 PetscInt *idx_V_B,*auxindices; 196 PetscInt n_vertices,n_constraints,size_of_constraint; 197 PetscInt i,j,n_R,n_D,n_B; 198 PetscBool unsymmetric_check; 199 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 200 MatType impMatType; 201 /* some shortcuts to scalars */ 202 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 203 /* for debugging purposes */ 204 PetscReal *coarsefunctions_errors,*constraints_errors; 205 206 PetscFunctionBegin; 207 /* get number of vertices (corners plus constraints with change of basis) 208 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 209 n_vertices = pcbddc->n_actual_vertices; 210 n_constraints = pcbddc->local_primal_size-n_vertices; 211 /* Set Non-overlapping dimensions */ 212 n_B = pcis->n_B; n_D = pcis->n - n_B; 213 n_R = pcis->n-n_vertices; 214 215 /* Set types for local objects needed by BDDC precondtioner */ 216 impMatType = MATSEQDENSE; 217 218 /* Allocating some extra storage just to be safe */ 219 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 220 for (i=0;i<pcis->n;i++) auxindices[i]=i; 221 222 /* vertices in boundary numbering */ 223 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 224 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 225 if (i != n_vertices) { 226 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 227 } 228 229 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 230 if (n_constraints) { 231 /* see if we can save some allocations */ 232 if (pcbddc->local_auxmat2) { 233 PetscInt on_R,on_constraints; 234 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 235 if (on_R != n_R || on_constraints != n_constraints) { 236 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 237 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 238 } 239 } 240 /* work vectors */ 241 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 242 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 243 /* auxiliary matrices */ 244 if (!pcbddc->local_auxmat2) { 245 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 246 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 247 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 249 } 250 251 /* Extract constraints on R nodes: C_{CR} */ 252 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 253 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 254 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 255 256 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 257 for (i=0;i<n_constraints;i++) { 258 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 259 /* Get row of constraint matrix in R numbering */ 260 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 262 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 263 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 264 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 265 /* Solve for row of constraint matrix in R numbering */ 266 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 267 /* Set values in local_auxmat2 */ 268 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 270 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 271 } 272 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 274 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 275 276 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 277 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 278 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 279 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 280 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 281 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 282 ierr = MatSetUp(M1);CHKERRQ(ierr); 283 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 284 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 285 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 286 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 287 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 288 ierr = MatDestroy(&M2);CHKERRQ(ierr); 289 ierr = MatDestroy(&M3);CHKERRQ(ierr); 290 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 291 if (!pcbddc->local_auxmat1) { 292 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } else { 294 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 295 } 296 } 297 298 /* Get submatrices from subdomain matrix */ 299 if (n_vertices) { 300 PetscInt ibs,mbs; 301 PetscBool issbaij; 302 Mat newmat; 303 304 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 305 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 306 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 307 if (ibs != mbs) { /* need to convert to SEQAIJ */ 308 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 310 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 311 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 313 } else { 314 /* this is safe */ 315 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 316 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 317 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 318 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 319 /* which of the two approaches is faster? */ 320 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 322 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 324 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 325 } else { 326 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 327 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 328 } 329 } 330 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 331 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 332 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 333 } 334 335 /* Matrix of coarse basis functions (local) */ 336 if (pcbddc->coarse_phi_B) { 337 PetscInt on_B,on_primal; 338 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 339 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 340 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 341 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 342 } 343 } 344 if (pcbddc->coarse_phi_D) { 345 PetscInt on_D,on_primal; 346 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 347 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 348 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 349 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 350 } 351 } 352 if (!pcbddc->coarse_phi_B) { 353 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 355 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 356 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 357 } 358 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 359 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 361 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 362 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 363 } 364 365 if (pcbddc->dbg_flag) { 366 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 367 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 368 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 369 } 370 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 371 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 372 373 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 374 375 /* vertices */ 376 for (i=0;i<n_vertices;i++) { 377 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 378 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 379 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 380 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 381 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 382 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 383 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 384 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 387 if (n_constraints) { 388 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 389 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 390 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 391 } 392 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 393 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 394 395 /* Set values in coarse basis function and subdomain part of coarse_mat */ 396 /* coarse basis functions */ 397 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 398 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 399 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 400 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 402 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 403 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 404 if (pcbddc->switch_static || pcbddc->dbg_flag) { 405 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 406 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 410 } 411 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 412 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 415 if (n_constraints) { 416 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 418 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 419 } 420 421 /* check */ 422 if (pcbddc->dbg_flag) { 423 /* assemble subdomain vector on local nodes */ 424 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 425 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 if (n_R) { 427 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 428 } 429 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 430 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 432 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 433 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 434 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 435 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 437 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 438 if (n_constraints) { 439 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 441 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 442 } 443 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 444 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 445 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 446 /* check saddle point solution */ 447 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 448 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 449 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 450 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 451 /* shift by the identity matrix */ 452 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 453 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 455 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 456 } 457 } 458 459 /* constraints */ 460 for (i=0;i<n_constraints;i++) { 461 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 462 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 463 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 464 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 465 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 466 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 467 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 468 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 469 if (n_vertices) { 470 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 471 } 472 /* Set values in coarse basis function and subdomain part of coarse_mat */ 473 /* coarse basis functions */ 474 j = i+n_vertices; /* don't touch this! */ 475 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 476 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 479 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 481 if (pcbddc->switch_static || pcbddc->dbg_flag) { 482 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 483 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 484 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 485 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 487 } 488 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 489 if (n_vertices) { 490 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 491 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 492 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 493 } 494 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 495 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 496 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 497 498 if (pcbddc->dbg_flag) { 499 /* assemble subdomain vector on nodes */ 500 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 501 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 502 if (n_R) { 503 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 504 } 505 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 506 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 507 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 508 /* assemble subdomain vector of lagrange multipliers */ 509 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 510 if (n_vertices) { 511 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 512 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 514 } 515 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 516 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 517 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 518 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 519 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 521 /* check saddle point solution */ 522 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 523 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 524 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 525 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 526 /* shift by the identity matrix */ 527 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 528 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 529 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 530 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 531 } 532 } 533 /* call assembling routines for local coarse basis */ 534 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 535 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 536 if (pcbddc->switch_static || pcbddc->dbg_flag) { 537 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 538 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 539 } 540 541 /* compute other basis functions for non-symmetric problems */ 542 /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/ 543 { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */ 544 PetscBool setsym; 545 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr); 546 if (!setsym) pcbddc->issym = PETSC_FALSE; 547 } 548 549 if (!pcbddc->issym) { 550 if (!pcbddc->coarse_psi_B) { 551 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 552 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 553 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 554 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 555 } 556 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 557 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 558 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 560 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 561 } 562 for (i=0;i<pcbddc->local_primal_size;i++) { 563 if (n_constraints) { 564 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 565 for (j=0;j<n_constraints;j++) { 566 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 567 } 568 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 569 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 570 } 571 if (i<n_vertices) { 572 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 573 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 574 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 575 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 576 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 577 if (n_constraints) { 578 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 579 } 580 } else { 581 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 582 } 583 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 584 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 585 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 586 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 587 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 588 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 589 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 590 if (i<n_vertices) { 591 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 592 } 593 if (pcbddc->switch_static || pcbddc->dbg_flag) { 594 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 595 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 596 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 597 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 598 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 599 } 600 601 if (pcbddc->dbg_flag) { 602 /* assemble subdomain vector on nodes */ 603 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 604 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 605 if (n_R) { 606 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 607 } 608 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 609 if (i<n_vertices) { 610 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 611 } 612 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 614 /* assemble subdomain vector of lagrange multipliers */ 615 for (j=0;j<pcbddc->local_primal_size;j++) { 616 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 617 } 618 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 619 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 620 /* check saddle point solution */ 621 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 622 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 623 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 624 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 625 /* shift by the identity matrix */ 626 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 627 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 628 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 629 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 630 } 631 } 632 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 633 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 634 if (pcbddc->switch_static || pcbddc->dbg_flag) { 635 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 637 } 638 unsymmetric_check = PETSC_TRUE; 639 } else { /* take references to already computed coarse basis */ 640 unsymmetric_check = PETSC_FALSE; 641 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 642 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 643 if (pcbddc->coarse_phi_D) { 644 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 645 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 646 } 647 } 648 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 649 /* Checking coarse_sub_mat and coarse basis functios */ 650 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 651 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 652 if (pcbddc->dbg_flag) { 653 Mat coarse_sub_mat; 654 Mat AUXMAT,TM1,TM2,TM3,TM4; 655 Mat coarse_phi_D,coarse_phi_B; 656 Mat coarse_psi_D,coarse_psi_B; 657 Mat A_II,A_BB,A_IB,A_BI; 658 MatType checkmattype=MATSEQAIJ; 659 PetscReal real_value; 660 661 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 662 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 663 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 664 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 665 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 666 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 667 if (unsymmetric_check) { 668 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 669 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 670 } 671 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 672 673 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 674 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 676 if (unsymmetric_check) { 677 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 684 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 685 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 686 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 687 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 688 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 689 } else { 690 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 691 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 692 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 693 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 694 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 695 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 696 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 697 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 698 } 699 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 700 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 701 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 702 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 703 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 704 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 705 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 710 for (i=0;i<pcbddc->local_primal_size;i++) { 711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 712 } 713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 714 for (i=0;i<pcbddc->local_primal_size;i++) { 715 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 716 } 717 if (unsymmetric_check) { 718 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 719 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 720 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 721 } 722 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 723 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 724 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 725 } 726 } 727 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 729 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 730 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 731 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 732 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 733 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 734 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 735 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 736 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 737 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 738 if (unsymmetric_check) { 739 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 740 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 741 } 742 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 743 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 744 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 745 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 746 } 747 /* free memory */ 748 if (n_vertices) { 749 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 750 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 751 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 752 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 753 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 754 } 755 if (n_constraints) { 756 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 757 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 758 ierr = MatDestroy(&M1);CHKERRQ(ierr); 759 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 760 } 761 ierr = PetscFree(auxindices);CHKERRQ(ierr); 762 /* get back data */ 763 *coarse_submat_vals_n = coarse_submat_vals; 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNCT__ 768 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 769 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 770 { 771 PC_IS* pcis = (PC_IS*)(pc->data); 772 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 773 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 774 PetscBool issbaij,isseqaij; 775 /* manage repeated solves */ 776 MatReuse reuse; 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 781 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 782 } 783 /* get mat flags */ 784 reuse = MAT_INITIAL_MATRIX; 785 if (pc->setupcalled) { 786 if (pc->flag == SAME_NONZERO_PATTERN) { 787 reuse = MAT_REUSE_MATRIX; 788 } else { 789 reuse = MAT_INITIAL_MATRIX; 790 } 791 } 792 if (reuse == MAT_INITIAL_MATRIX) { 793 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 794 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 795 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 796 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 797 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 798 } 799 800 /* transform local matrices if needed */ 801 if (pcbddc->ChangeOfBasisMatrix) { 802 Mat *change_mat_all; 803 IS is_local,is_global; 804 PetscBool sorted; 805 PetscInt *idxs_perm; 806 807 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr); 808 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 809 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 810 ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr); 811 if (!sorted) { 812 const PetscInt *idxs; 813 PetscInt *idxs_sorted,i; 814 815 ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr); 816 ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr); 817 for (i=0;i<pcis->n;i++) { 818 idxs_perm[i] = i; 819 } 820 ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr); 821 ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr); 822 for (i=0;i<pcis->n;i++) { 823 idxs_sorted[i] = idxs[idxs_perm[i]]; 824 } 825 ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr); 826 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 827 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr); 828 } 829 830 /* get change of basis on the whole set of local dofs */ 831 ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr); 832 833 if (!sorted) { 834 Mat new_mat; 835 IS is_perm; 836 PetscInt *idxs,i; 837 838 ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr); 839 for (i=0;i<pcis->n;i++) { 840 idxs[idxs_perm[i]] = i; 841 } 842 ierr = PetscFree(idxs_perm);CHKERRQ(ierr); 843 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr); 844 ierr = ISSetPermutation(is_perm);CHKERRQ(ierr); 845 ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr); 846 ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr); 847 change_mat_all[0] = new_mat; 848 ierr = ISDestroy(&is_perm);CHKERRQ(ierr); 849 } 850 851 /* check */ 852 if (pcbddc->dbg_flag) { 853 Vec x,x_change; 854 PetscReal error; 855 856 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 857 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 858 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 859 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 860 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 863 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 864 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 865 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 866 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 867 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 868 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 870 ierr = VecDestroy(&x);CHKERRQ(ierr); 871 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 872 } 873 874 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 875 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 876 if (isseqaij) { 877 ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 878 } else { 879 Mat work_mat; 880 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 881 ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 882 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 883 } 884 /* 885 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 886 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 887 */ 888 ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr); 889 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 890 } else { 891 /* without change of basis, the local matrix is unchanged */ 892 if (!pcbddc->local_mat) { 893 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 894 pcbddc->local_mat = matis->A; 895 } 896 } 897 898 /* get submatrices */ 899 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 900 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 901 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 902 if (!issbaij) { 903 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 904 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 905 } else { 906 Mat newmat; 907 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 908 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 909 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 910 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 917 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 918 { 919 PC_IS* pcis = (PC_IS*)(pc->data); 920 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 921 IS is_aux1,is_aux2; 922 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 923 PetscInt n_vertices,i,j,n_R,n_D,n_B; 924 PetscInt vbs,bs; 925 PetscBT bitmask; 926 PetscErrorCode ierr; 927 928 PetscFunctionBegin; 929 /* 930 No need to setup local scatters if 931 - primal space is unchanged 932 AND 933 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 934 AND 935 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 936 */ 937 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 938 PetscFunctionReturn(0); 939 } 940 /* destroy old objects */ 941 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 942 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 943 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 944 /* Set Non-overlapping dimensions */ 945 n_B = pcis->n_B; n_D = pcis->n - n_B; 946 n_vertices = pcbddc->n_actual_vertices; 947 /* create auxiliary bitmask */ 948 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 949 for (i=0;i<n_vertices;i++) { 950 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 951 } 952 953 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 954 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 955 for (i=0, n_R=0; i<pcis->n; i++) { 956 if (!PetscBTLookup(bitmask,i)) { 957 idx_R_local[n_R] = i; 958 n_R++; 959 } 960 } 961 962 /* Block code */ 963 vbs = 1; 964 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 965 if (bs>1 && !(n_vertices%bs)) { 966 PetscBool is_blocked = PETSC_TRUE; 967 PetscInt *vary; 968 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 969 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 970 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 971 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 972 for (i=0; i<n_vertices; i++) { 973 if (vary[i]!=0 && vary[i]!=bs) { 974 is_blocked = PETSC_FALSE; 975 break; 976 } 977 } 978 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 979 vbs = bs; 980 for (i=0;i<n_R/vbs;i++) { 981 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 982 } 983 } 984 ierr = PetscFree(vary);CHKERRQ(ierr); 985 } 986 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 987 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 988 989 /* print some info if requested */ 990 if (pcbddc->dbg_flag) { 991 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 993 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 996 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 999 } 1000 1001 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1002 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1003 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1004 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1005 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1006 for (i=0; i<n_D; i++) { 1007 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1008 } 1009 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1010 for (i=0, j=0; i<n_R; i++) { 1011 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1012 aux_array1[j++] = i; 1013 } 1014 } 1015 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1016 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1017 for (i=0, j=0; i<n_B; i++) { 1018 if (!PetscBTLookup(bitmask,is_indices[i])) { 1019 aux_array2[j++] = i; 1020 } 1021 } 1022 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1023 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1024 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1025 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1026 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1027 1028 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1029 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1030 for (i=0, j=0; i<n_R; i++) { 1031 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1032 aux_array1[j++] = i; 1033 } 1034 } 1035 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1036 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1037 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1038 } 1039 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1040 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1047 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1048 { 1049 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1050 PC_IS *pcis = (PC_IS*)pc->data; 1051 PC pc_temp; 1052 Mat A_RR; 1053 MatReuse reuse; 1054 PetscScalar m_one = -1.0; 1055 PetscReal value; 1056 PetscInt n_D,n_R,ibs,mbs; 1057 PetscBool use_exact,use_exact_reduced,issbaij; 1058 PetscErrorCode ierr; 1059 /* prefixes stuff */ 1060 char dir_prefix[256],neu_prefix[256],str_level[16]; 1061 size_t len; 1062 1063 PetscFunctionBegin; 1064 1065 /* compute prefixes */ 1066 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1067 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1068 if (!pcbddc->current_level) { 1069 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1070 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1071 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1072 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1073 } else { 1074 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1075 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1076 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1077 len -= 15; /* remove "pc_bddc_coarse_" */ 1078 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1079 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1080 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1081 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1082 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1083 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1084 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1085 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1086 } 1087 1088 /* DIRICHLET PROBLEM */ 1089 /* Matrix for Dirichlet problem is pcis->A_II */ 1090 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1091 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1092 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1093 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1094 /* default */ 1095 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1096 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1097 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1098 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1099 if (issbaij) { 1100 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1101 } else { 1102 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1103 } 1104 /* Allow user's customization */ 1105 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1106 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1107 } 1108 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1109 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1110 if (!n_D) { 1111 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1112 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1113 } 1114 /* Set Up KSP for Dirichlet problem of BDDC */ 1115 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1116 /* set ksp_D into pcis data */ 1117 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1118 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1119 pcis->ksp_D = pcbddc->ksp_D; 1120 1121 /* NEUMANN PROBLEM */ 1122 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1123 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1124 if (pcbddc->ksp_R) { /* already created ksp */ 1125 PetscInt nn_R; 1126 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1127 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1128 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1129 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1130 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1131 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1132 reuse = MAT_INITIAL_MATRIX; 1133 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1134 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1135 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1136 reuse = MAT_INITIAL_MATRIX; 1137 } else { /* safe to reuse the matrix */ 1138 reuse = MAT_REUSE_MATRIX; 1139 } 1140 } 1141 /* last check */ 1142 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1143 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1144 reuse = MAT_INITIAL_MATRIX; 1145 } 1146 } else { /* first time, so we need to create the matrix */ 1147 reuse = MAT_INITIAL_MATRIX; 1148 } 1149 /* extract A_RR */ 1150 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1151 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1152 if (ibs != mbs) { 1153 Mat newmat; 1154 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1155 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1156 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1157 } else { 1158 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1159 } 1160 if (!pcbddc->ksp_R) { /* create object if not present */ 1161 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1162 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1163 /* default */ 1164 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1165 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1166 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1167 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1168 if (issbaij) { 1169 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1170 } else { 1171 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1172 } 1173 /* Allow user's customization */ 1174 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1175 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1176 } 1177 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1178 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1179 if (!n_R) { 1180 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1181 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1182 } 1183 /* Set Up KSP for Neumann problem of BDDC */ 1184 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1185 1186 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1187 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1188 /* Dirichlet */ 1189 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1190 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1191 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1192 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1193 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1194 /* need to be adapted? */ 1195 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1196 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1197 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1198 /* print info */ 1199 if (pcbddc->dbg_flag) { 1200 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1201 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1202 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1203 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1204 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1205 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1206 } 1207 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1208 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1209 } 1210 1211 /* Neumann */ 1212 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1213 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1214 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1215 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1216 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1217 /* need to be adapted? */ 1218 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1219 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1220 /* print info */ 1221 if (pcbddc->dbg_flag) { 1222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1223 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1224 } 1225 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1226 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1227 } 1228 } 1229 /* free Neumann problem's matrix */ 1230 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1236 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1237 { 1238 PetscErrorCode ierr; 1239 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1240 1241 PetscFunctionBegin; 1242 if (applytranspose) { 1243 if (pcbddc->local_auxmat1) { 1244 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1245 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1246 } 1247 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1248 } else { 1249 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1250 if (pcbddc->local_auxmat1) { 1251 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1252 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1253 } 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1261 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1262 { 1263 PetscErrorCode ierr; 1264 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1265 PC_IS* pcis = (PC_IS*) (pc->data); 1266 const PetscScalar zero = 0.0; 1267 1268 PetscFunctionBegin; 1269 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1270 if (applytranspose) { 1271 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1272 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1273 } else { 1274 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1275 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1276 } 1277 /* start communications from local primal nodes to rhs of coarse solver */ 1278 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1279 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1280 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1281 1282 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1283 /* TODO remove null space when doing multilevel */ 1284 if (pcbddc->coarse_ksp) { 1285 if (applytranspose) { 1286 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1287 } else { 1288 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1289 } 1290 } 1291 1292 /* Local solution on R nodes */ 1293 if (pcis->n) { 1294 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1295 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1296 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1297 if (pcbddc->switch_static) { 1298 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1299 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1300 } 1301 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1302 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1303 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1304 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 if (pcbddc->switch_static) { 1306 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1308 } 1309 } 1310 1311 /* communications from coarse sol to local primal nodes */ 1312 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1313 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1314 1315 /* Sum contributions from two levels */ 1316 if (applytranspose) { 1317 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1318 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1319 } else { 1320 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1321 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1329 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1330 { 1331 PetscErrorCode ierr; 1332 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1333 PetscScalar *array,*array2; 1334 Vec from,to; 1335 1336 PetscFunctionBegin; 1337 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1338 from = pcbddc->coarse_vec; 1339 to = pcbddc->vec1_P; 1340 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1341 Vec tvec; 1342 PetscInt lsize; 1343 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1344 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1345 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1346 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1347 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1348 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1349 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1350 } 1351 } else { /* from local to global -> put data in coarse right hand side */ 1352 from = pcbddc->vec1_P; 1353 to = pcbddc->coarse_vec; 1354 } 1355 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1361 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1362 { 1363 PetscErrorCode ierr; 1364 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1365 PetscScalar *array,*array2; 1366 Vec from,to; 1367 1368 PetscFunctionBegin; 1369 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1370 from = pcbddc->coarse_vec; 1371 to = pcbddc->vec1_P; 1372 } else { /* from local to global -> put data in coarse right hand side */ 1373 from = pcbddc->vec1_P; 1374 to = pcbddc->coarse_vec; 1375 } 1376 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1377 if (smode == SCATTER_FORWARD) { 1378 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1379 Vec tvec; 1380 PetscInt lsize; 1381 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1382 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1383 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1384 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1385 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1386 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1387 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1388 } 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 /* uncomment for testing purposes */ 1394 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1397 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1398 { 1399 PetscErrorCode ierr; 1400 PC_IS* pcis = (PC_IS*)(pc->data); 1401 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1402 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1403 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1404 MatType impMatType=MATSEQAIJ; 1405 /* one and zero */ 1406 PetscScalar one=1.0,zero=0.0; 1407 /* space to store constraints and their local indices */ 1408 PetscScalar *temp_quadrature_constraint; 1409 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1410 /* iterators */ 1411 PetscInt i,j,k,total_counts,temp_start_ptr; 1412 /* stuff to store connected components stored in pcbddc->mat_graph */ 1413 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1414 PetscInt n_ISForFaces,n_ISForEdges; 1415 /* near null space stuff */ 1416 MatNullSpace nearnullsp; 1417 const Vec *nearnullvecs; 1418 Vec *localnearnullsp; 1419 PetscBool nnsp_has_cnst; 1420 PetscInt nnsp_size; 1421 PetscScalar *array; 1422 /* BLAS integers */ 1423 PetscBLASInt lwork,lierr; 1424 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1425 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1426 /* LAPACK working arrays for SVD or POD */ 1427 PetscBool skip_lapack; 1428 PetscScalar *work; 1429 PetscReal *singular_vals; 1430 #if defined(PETSC_USE_COMPLEX) 1431 PetscReal *rwork; 1432 #endif 1433 #if defined(PETSC_MISSING_LAPACK_GESVD) 1434 PetscBLASInt Blas_one_2=1; 1435 PetscScalar *temp_basis,*correlation_mat; 1436 #else 1437 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1438 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1439 #endif 1440 /* reuse */ 1441 PetscInt olocal_primal_size; 1442 PetscInt *oprimal_indices_local_idxs; 1443 /* change of basis */ 1444 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1445 PetscBool boolforchange,qr_needed; 1446 PetscBT touched,change_basis,qr_needed_idx; 1447 /* auxiliary stuff */ 1448 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1449 PetscInt ncc,*gidxs,*permutation,*temp_indices_to_constraint_work; 1450 PetscScalar *temp_quadrature_constraint_work; 1451 /* some quantities */ 1452 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1453 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1454 1455 1456 PetscFunctionBegin; 1457 /* Destroy Mat objects computed previously */ 1458 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1459 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1460 /* Get index sets for faces, edges and vertices from graph */ 1461 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1462 /* free unneeded index sets */ 1463 if (!pcbddc->use_vertices) { 1464 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1465 } 1466 if (!pcbddc->use_edges) { 1467 for (i=0;i<n_ISForEdges;i++) { 1468 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1469 } 1470 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1471 n_ISForEdges = 0; 1472 } 1473 if (!pcbddc->use_faces) { 1474 for (i=0;i<n_ISForFaces;i++) { 1475 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1476 } 1477 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1478 n_ISForFaces = 0; 1479 } 1480 /* HACKS (the following two blocks of code) */ 1481 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1482 pcbddc->use_change_of_basis = PETSC_TRUE; 1483 if (!ISForEdges) { 1484 pcbddc->use_change_on_faces = PETSC_TRUE; 1485 } 1486 } 1487 if (pcbddc->NullSpace) { 1488 /* use_change_of_basis should be consistent among processors */ 1489 PetscBool tbool[2],gbool[2]; 1490 tbool [0] = pcbddc->use_change_of_basis; 1491 tbool [1] = pcbddc->use_change_on_faces; 1492 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1493 pcbddc->use_change_of_basis = gbool[0]; 1494 pcbddc->use_change_on_faces = gbool[1]; 1495 } 1496 /* print some info */ 1497 if (pcbddc->dbg_flag) { 1498 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1499 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1500 i = 0; 1501 if (ISForVertices) { 1502 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1503 } 1504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1505 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1506 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1507 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1508 } 1509 /* check if near null space is attached to global mat */ 1510 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1511 if (nearnullsp) { 1512 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1513 /* remove any stored info */ 1514 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1515 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1516 /* store information for BDDC solver reuse */ 1517 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1518 pcbddc->onearnullspace = nearnullsp; 1519 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1520 for (i=0;i<nnsp_size;i++) { 1521 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1522 } 1523 } else { /* if near null space is not provided BDDC uses constants by default */ 1524 nnsp_size = 0; 1525 nnsp_has_cnst = PETSC_TRUE; 1526 } 1527 /* get max number of constraints on a single cc */ 1528 max_constraints = nnsp_size; 1529 if (nnsp_has_cnst) max_constraints++; 1530 1531 /* 1532 Evaluate maximum storage size needed by the procedure 1533 - temp_indices will contain start index of each constraint stored as follows 1534 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1535 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1536 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1537 */ 1538 total_counts = n_ISForFaces+n_ISForEdges; 1539 total_counts *= max_constraints; 1540 n_vertices = 0; 1541 if (ISForVertices) { 1542 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1543 } 1544 total_counts += n_vertices; 1545 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1546 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1547 total_counts = 0; 1548 max_size_of_constraint = 0; 1549 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1550 if (i<n_ISForEdges) { 1551 used_IS = &ISForEdges[i]; 1552 } else { 1553 used_IS = &ISForFaces[i-n_ISForEdges]; 1554 } 1555 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1556 total_counts += j; 1557 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1558 } 1559 total_counts *= max_constraints; 1560 total_counts += n_vertices; 1561 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1562 /* get local part of global near null space vectors */ 1563 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1564 for (k=0;k<nnsp_size;k++) { 1565 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1566 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 } 1569 1570 /* whether or not to skip lapack calls */ 1571 skip_lapack = PETSC_TRUE; 1572 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1573 1574 /* allocate some auxiliary stuff */ 1575 if (!skip_lapack || pcbddc->use_qr_single) { 1576 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1577 } else { 1578 gidxs = NULL; 1579 permutation = NULL; 1580 temp_indices_to_constraint_work = NULL; 1581 temp_quadrature_constraint_work = NULL; 1582 } 1583 1584 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1585 if (!skip_lapack) { 1586 PetscScalar temp_work; 1587 1588 #if defined(PETSC_MISSING_LAPACK_GESVD) 1589 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1590 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1591 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1592 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1593 #if defined(PETSC_USE_COMPLEX) 1594 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1595 #endif 1596 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1597 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1598 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1599 lwork = -1; 1600 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1601 #if !defined(PETSC_USE_COMPLEX) 1602 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1603 #else 1604 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1605 #endif 1606 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1607 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1608 #else /* on missing GESVD */ 1609 /* SVD */ 1610 PetscInt max_n,min_n; 1611 max_n = max_size_of_constraint; 1612 min_n = max_constraints; 1613 if (max_size_of_constraint < max_constraints) { 1614 min_n = max_size_of_constraint; 1615 max_n = max_constraints; 1616 } 1617 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1618 #if defined(PETSC_USE_COMPLEX) 1619 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1620 #endif 1621 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1622 lwork = -1; 1623 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1624 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1625 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1626 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1627 #if !defined(PETSC_USE_COMPLEX) 1628 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1629 #else 1630 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1631 #endif 1632 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1633 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1634 #endif /* on missing GESVD */ 1635 /* Allocate optimal workspace */ 1636 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1637 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1638 } 1639 /* Now we can loop on constraining sets */ 1640 total_counts = 0; 1641 temp_indices[0] = 0; 1642 /* vertices */ 1643 if (ISForVertices) { 1644 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1645 if (nnsp_has_cnst) { /* consider all vertices */ 1646 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1647 for (i=0;i<n_vertices;i++) { 1648 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1649 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1650 total_counts++; 1651 } 1652 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1653 PetscBool used_vertex; 1654 for (i=0;i<n_vertices;i++) { 1655 used_vertex = PETSC_FALSE; 1656 k = 0; 1657 while (!used_vertex && k<nnsp_size) { 1658 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1659 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1660 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1661 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1662 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1663 total_counts++; 1664 used_vertex = PETSC_TRUE; 1665 } 1666 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1667 k++; 1668 } 1669 } 1670 } 1671 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1672 n_vertices = total_counts; 1673 } 1674 1675 /* edges and faces */ 1676 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1677 if (ncc<n_ISForEdges) { 1678 used_IS = &ISForEdges[ncc]; 1679 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1680 } else { 1681 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1682 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1683 } 1684 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1685 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1686 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1687 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1688 /* change of basis should not be performed on local periodic nodes */ 1689 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1690 if (nnsp_has_cnst) { 1691 PetscScalar quad_value; 1692 temp_constraints++; 1693 if (!pcbddc->use_nnsp_true) { 1694 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1695 } else { 1696 quad_value = 1.0; 1697 } 1698 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1699 for (j=0;j<size_of_constraint;j++) { 1700 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1701 } 1702 /* sort by global ordering if using lapack subroutines */ 1703 if (!skip_lapack || pcbddc->use_qr_single) { 1704 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1705 for (j=0;j<size_of_constraint;j++) { 1706 permutation[j]=j; 1707 } 1708 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1709 for (j=0;j<size_of_constraint;j++) { 1710 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1711 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1712 } 1713 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1714 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1715 } 1716 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1717 total_counts++; 1718 } 1719 for (k=0;k<nnsp_size;k++) { 1720 PetscReal real_value; 1721 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1722 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1723 for (j=0;j<size_of_constraint;j++) { 1724 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1725 } 1726 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1727 /* check if array is null on the connected component */ 1728 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1729 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1730 if (real_value > 0.0) { /* keep indices and values */ 1731 /* sort by global ordering if using lapack subroutines */ 1732 if (!skip_lapack || pcbddc->use_qr_single) { 1733 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1734 for (j=0;j<size_of_constraint;j++) { 1735 permutation[j]=j; 1736 } 1737 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1738 for (j=0;j<size_of_constraint;j++) { 1739 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1740 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1741 } 1742 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1743 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1744 } 1745 temp_constraints++; 1746 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1747 total_counts++; 1748 } 1749 } 1750 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1751 valid_constraints = temp_constraints; 1752 if (!pcbddc->use_nnsp_true && temp_constraints) { 1753 if (temp_constraints == 1) { /* just normalize the constraint */ 1754 PetscScalar norm; 1755 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1756 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1757 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1758 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1759 } else { /* perform SVD */ 1760 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1761 1762 #if defined(PETSC_MISSING_LAPACK_GESVD) 1763 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1764 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1765 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1766 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1767 from that computed using LAPACKgesvd 1768 -> This is due to a different computation of eigenvectors in LAPACKheev 1769 -> The quality of the POD-computed basis will be the same */ 1770 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1771 /* Store upper triangular part of correlation matrix */ 1772 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1773 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1774 for (j=0;j<temp_constraints;j++) { 1775 for (k=0;k<j+1;k++) { 1776 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1777 } 1778 } 1779 /* compute eigenvalues and eigenvectors of correlation matrix */ 1780 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1781 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1782 #if !defined(PETSC_USE_COMPLEX) 1783 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1784 #else 1785 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1786 #endif 1787 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1788 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1789 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1790 j = 0; 1791 while (j < temp_constraints && singular_vals[j] < tol) j++; 1792 total_counts = total_counts-j; 1793 valid_constraints = temp_constraints-j; 1794 /* scale and copy POD basis into used quadrature memory */ 1795 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1796 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1797 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1798 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1799 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1800 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1801 if (j<temp_constraints) { 1802 PetscInt ii; 1803 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1804 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1805 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1806 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1807 for (k=0;k<temp_constraints-j;k++) { 1808 for (ii=0;ii<size_of_constraint;ii++) { 1809 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1810 } 1811 } 1812 } 1813 #else /* on missing GESVD */ 1814 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1815 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1817 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1818 #if !defined(PETSC_USE_COMPLEX) 1819 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1820 #else 1821 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1822 #endif 1823 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1824 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1825 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1826 k = temp_constraints; 1827 if (k > size_of_constraint) k = size_of_constraint; 1828 j = 0; 1829 while (j < k && singular_vals[k-j-1] < tol) j++; 1830 valid_constraints = k-j; 1831 total_counts = total_counts-temp_constraints+valid_constraints; 1832 #endif /* on missing GESVD */ 1833 } 1834 } 1835 /* setting change_of_basis flag is safe now */ 1836 if (boolforchange) { 1837 for (j=0;j<valid_constraints;j++) { 1838 PetscBTSet(change_basis,total_counts-j-1); 1839 } 1840 } 1841 } 1842 /* free index sets of faces, edges and vertices */ 1843 for (i=0;i<n_ISForFaces;i++) { 1844 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1845 } 1846 if (n_ISForFaces) { 1847 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1848 } 1849 for (i=0;i<n_ISForEdges;i++) { 1850 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1851 } 1852 if (n_ISForEdges) { 1853 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1854 } 1855 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1856 /* map temp_indices_to_constraint in boundary numbering */ 1857 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1858 if (i != temp_indices[total_counts]) { 1859 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1860 } 1861 1862 /* free workspace */ 1863 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 1864 if (!skip_lapack) { 1865 ierr = PetscFree(work);CHKERRQ(ierr); 1866 #if defined(PETSC_USE_COMPLEX) 1867 ierr = PetscFree(rwork);CHKERRQ(ierr); 1868 #endif 1869 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1870 #if defined(PETSC_MISSING_LAPACK_GESVD) 1871 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1872 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1873 #endif 1874 } 1875 for (k=0;k<nnsp_size;k++) { 1876 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1877 } 1878 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1879 1880 /* set quantities in pcbddc data structure and store previous primal size */ 1881 /* n_vertices defines the number of subdomain corners in the primal space */ 1882 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1883 olocal_primal_size = pcbddc->local_primal_size; 1884 pcbddc->local_primal_size = total_counts; 1885 pcbddc->n_vertices = n_vertices; 1886 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1887 1888 /* Create constraint matrix */ 1889 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1890 /* so we need to set it up properly either with or without change of basis */ 1891 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1892 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1893 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1894 /* array to compute a local numbering of constraints : vertices first then constraints */ 1895 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1896 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1897 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1898 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1899 /* auxiliary stuff for basis change */ 1900 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1901 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1902 1903 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1904 total_primal_vertices=0; 1905 for (i=0;i<pcbddc->local_primal_size;i++) { 1906 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1907 if (size_of_constraint == 1) { 1908 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1909 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1910 aux_primal_minloc[total_primal_vertices]=0; 1911 total_primal_vertices++; 1912 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1913 PetscInt min_loc,min_index; 1914 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1915 /* find first untouched local node */ 1916 k = 0; 1917 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1918 min_index = global_indices[k]; 1919 min_loc = k; 1920 /* search the minimum among global nodes already untouched on the cc */ 1921 for (k=1;k<size_of_constraint;k++) { 1922 /* there can be more than one constraint on a single connected component */ 1923 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1924 min_index = global_indices[k]; 1925 min_loc = k; 1926 } 1927 } 1928 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1929 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1930 aux_primal_minloc[total_primal_vertices]=min_loc; 1931 total_primal_vertices++; 1932 } 1933 } 1934 /* determine if a QR strategy is needed for change of basis */ 1935 qr_needed = PETSC_FALSE; 1936 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1937 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1938 if (PetscBTLookup(change_basis,i)) { 1939 if (!pcbddc->use_qr_single) { 1940 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1941 j = 0; 1942 for (k=0;k<size_of_constraint;k++) { 1943 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1944 j++; 1945 } 1946 } 1947 /* found more than one primal dof on the cc */ 1948 if (j > 1) { 1949 PetscBTSet(qr_needed_idx,i); 1950 qr_needed = PETSC_TRUE; 1951 } 1952 } else { 1953 PetscBTSet(qr_needed_idx,i); 1954 qr_needed = PETSC_TRUE; 1955 } 1956 } 1957 } 1958 /* free workspace */ 1959 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1960 1961 /* permute indices in order to have a sorted set of vertices */ 1962 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1963 1964 /* nonzero structure of constraint matrix */ 1965 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1966 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1967 j=total_primal_vertices; 1968 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1969 if (!PetscBTLookup(change_basis,i)) { 1970 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1971 j++; 1972 } 1973 } 1974 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1975 ierr = PetscFree(nnz);CHKERRQ(ierr); 1976 /* set values in constraint matrix */ 1977 for (i=0;i<total_primal_vertices;i++) { 1978 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1979 } 1980 total_counts = total_primal_vertices; 1981 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1982 if (!PetscBTLookup(change_basis,i)) { 1983 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1984 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1985 total_counts++; 1986 } 1987 } 1988 /* assembling */ 1989 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1990 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1991 /* 1992 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1993 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1994 */ 1995 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1996 if (pcbddc->use_change_of_basis) { 1997 /* dual and primal dofs on a single cc */ 1998 PetscInt dual_dofs,primal_dofs; 1999 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2000 PetscInt primal_counter; 2001 /* working stuff for GEQRF */ 2002 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2003 PetscBLASInt lqr_work; 2004 /* working stuff for UNGQR */ 2005 PetscScalar *gqr_work,lgqr_work_t; 2006 PetscBLASInt lgqr_work; 2007 /* working stuff for TRTRS */ 2008 PetscScalar *trs_rhs; 2009 PetscBLASInt Blas_NRHS; 2010 /* pointers for values insertion into change of basis matrix */ 2011 PetscInt *start_rows,*start_cols; 2012 PetscScalar *start_vals; 2013 /* working stuff for values insertion */ 2014 PetscBT is_primal; 2015 /* matrix sizes */ 2016 PetscInt global_size,local_size; 2017 /* work array for nonzeros */ 2018 PetscScalar *nnz_array; 2019 /* temporary change of basis */ 2020 Mat localChangeOfBasisMatrix; 2021 /* auxiliary work for global change of basis */ 2022 Vec nnz_vec; 2023 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 2024 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 2025 PetscScalar *vals; 2026 PetscBool done; 2027 2028 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2029 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2030 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2031 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2032 2033 /* nonzeros for local mat */ 2034 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 2035 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 2036 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2037 if (PetscBTLookup(change_basis,i)) { 2038 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2039 if (PetscBTLookup(qr_needed_idx,i)) { 2040 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2041 } else { 2042 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 2043 /* get local primal index on the cc */ 2044 j = 0; 2045 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2046 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2047 } 2048 } 2049 } 2050 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2051 /* Set initial identity in the matrix */ 2052 for (i=0;i<pcis->n_B;i++) { 2053 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2054 } 2055 2056 if (pcbddc->dbg_flag) { 2057 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2058 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2059 } 2060 2061 2062 /* Now we loop on the constraints which need a change of basis */ 2063 /* 2064 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2065 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2066 2067 Basic blocks of change of basis matrix T computed by 2068 2069 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2070 2071 | 1 0 ... 0 s_1/S | 2072 | 0 1 ... 0 s_2/S | 2073 | ... | 2074 | 0 ... 1 s_{n-1}/S | 2075 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2076 2077 with S = \sum_{i=1}^n s_i^2 2078 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2079 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2080 2081 - QR decomposition of constraints otherwise 2082 */ 2083 if (qr_needed) { 2084 /* space to store Q */ 2085 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2086 /* first we issue queries for optimal work */ 2087 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2089 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2090 lqr_work = -1; 2091 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2092 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2093 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2094 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2095 lgqr_work = -1; 2096 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2097 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2098 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2099 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2100 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2101 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2102 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2103 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2104 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2105 /* array to store scaling factors for reflectors */ 2106 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2107 /* array to store rhs and solution of triangular solver */ 2108 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2109 /* allocating workspace for check */ 2110 if (pcbddc->dbg_flag) { 2111 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2112 } 2113 } 2114 /* array to store whether a node is primal or not */ 2115 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2116 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2117 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2118 if (i != total_primal_vertices) { 2119 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2120 } 2121 for (i=0;i<total_primal_vertices;i++) { 2122 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2123 } 2124 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2125 2126 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2127 /* -> using implicit ordering contained in temp_indices data */ 2128 total_counts = pcbddc->n_vertices; 2129 primal_counter = total_counts; 2130 while (total_counts<pcbddc->local_primal_size) { 2131 primal_dofs = 1; 2132 if (PetscBTLookup(change_basis,total_counts)) { 2133 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2134 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2135 primal_dofs++; 2136 } 2137 /* get constraint info */ 2138 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2139 dual_dofs = size_of_constraint-primal_dofs; 2140 2141 if (pcbddc->dbg_flag) { 2142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2143 } 2144 2145 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2146 2147 /* copy quadrature constraints for change of basis check */ 2148 if (pcbddc->dbg_flag) { 2149 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2150 } 2151 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2152 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2153 2154 /* compute QR decomposition of constraints */ 2155 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2156 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2157 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2158 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2159 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2160 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2161 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2162 2163 /* explictly compute R^-T */ 2164 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2165 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2166 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2167 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2168 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2169 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2170 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2171 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2172 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2173 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2174 2175 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2176 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2177 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2178 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2179 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2180 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2181 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2182 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2183 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2184 2185 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2186 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2187 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2188 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2189 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2190 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2191 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2192 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2193 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2194 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2195 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2196 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2197 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2198 2199 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2200 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2201 /* insert cols for primal dofs */ 2202 for (j=0;j<primal_dofs;j++) { 2203 start_vals = &qr_basis[j*size_of_constraint]; 2204 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2205 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2206 } 2207 /* insert cols for dual dofs */ 2208 for (j=0,k=0;j<dual_dofs;k++) { 2209 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2210 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2211 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2212 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2213 j++; 2214 } 2215 } 2216 2217 /* check change of basis */ 2218 if (pcbddc->dbg_flag) { 2219 PetscInt ii,jj; 2220 PetscBool valid_qr=PETSC_TRUE; 2221 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2222 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2223 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2224 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2225 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2226 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2227 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2228 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2229 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2230 for (jj=0;jj<size_of_constraint;jj++) { 2231 for (ii=0;ii<primal_dofs;ii++) { 2232 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2233 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2234 } 2235 } 2236 if (!valid_qr) { 2237 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2238 for (jj=0;jj<size_of_constraint;jj++) { 2239 for (ii=0;ii<primal_dofs;ii++) { 2240 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2241 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2242 } 2243 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2244 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2245 } 2246 } 2247 } 2248 } else { 2249 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2250 } 2251 } 2252 } else { /* simple transformation block */ 2253 PetscInt row,col; 2254 PetscScalar val,norm; 2255 2256 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2257 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2258 for (j=0;j<size_of_constraint;j++) { 2259 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2260 if (!PetscBTLookup(is_primal,row)) { 2261 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2262 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2263 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2264 } else { 2265 for (k=0;k<size_of_constraint;k++) { 2266 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2267 if (row != col) { 2268 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2269 } else { 2270 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2271 } 2272 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2273 } 2274 } 2275 } 2276 if (pcbddc->dbg_flag) { 2277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2278 } 2279 } 2280 /* increment primal counter */ 2281 primal_counter += primal_dofs; 2282 } else { 2283 if (pcbddc->dbg_flag) { 2284 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2285 } 2286 } 2287 /* increment constraint counter total_counts */ 2288 total_counts += primal_dofs; 2289 } 2290 2291 /* free workspace */ 2292 if (qr_needed) { 2293 if (pcbddc->dbg_flag) { 2294 ierr = PetscFree(work);CHKERRQ(ierr); 2295 } 2296 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2297 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2298 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2299 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2300 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2301 } 2302 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2303 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2304 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2305 2306 /* assembling of global change of variable */ 2307 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2308 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2309 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2310 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2311 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2312 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2313 2314 /* nonzeros (overestimated) */ 2315 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2316 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2317 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2318 for (i=0;i<pcis->n;i++) { 2319 nnz_array[i] = 1.0; 2320 idxs_all[i] = i; 2321 } 2322 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2323 for (i=0;i<pcis->n_B;i++) { 2324 nnz_array[idxs_B[i]] = nnz[i]; 2325 } 2326 if (pcis->n) { 2327 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2328 } 2329 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2330 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2331 ierr = PetscFree(nnz);CHKERRQ(ierr); 2332 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2333 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2334 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2335 for (i=0;i<local_size;i++) { 2336 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2337 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2338 } 2339 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2340 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2341 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2342 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2343 2344 /* Set identity on dirichlet dofs */ 2345 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2346 for (i=0;i<pcis->n-pcis->n_B;i++) { 2347 PetscScalar one=1.0; 2348 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2349 } 2350 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2351 2352 /* Set values at interface dofs */ 2353 done = PETSC_TRUE; 2354 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2355 if (!done) { 2356 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2357 } 2358 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2359 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2360 ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2361 for (i=0;i<nvtxs;i++) { 2362 PetscInt row,*cols,ncols; 2363 PetscScalar *mat_vals; 2364 2365 row = idxs_B[i]; 2366 ncols = xadj[i+1]-xadj[i]; 2367 cols = idxs_mapped+xadj[i]; 2368 mat_vals = vals+xadj[i]; 2369 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2370 } 2371 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2372 if (!done) { 2373 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2374 } 2375 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2376 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2377 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2378 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2379 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2380 2381 /* check */ 2382 if (pcbddc->dbg_flag) { 2383 PetscReal error; 2384 Vec x,x_change; 2385 2386 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2387 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2388 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2389 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2390 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2391 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2392 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2393 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2394 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2395 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2396 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2397 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2399 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2400 ierr = VecDestroy(&x);CHKERRQ(ierr); 2401 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2402 } 2403 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2404 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2405 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2406 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2407 } 2408 2409 /* set up change of basis context */ 2410 if (pcbddc->ChangeOfBasisMatrix) { 2411 PCBDDCChange_ctx change_ctx; 2412 2413 if (!pcbddc->new_global_mat) { 2414 PetscInt global_size,local_size; 2415 2416 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2417 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2418 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2419 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2420 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2421 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2422 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2423 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2424 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2425 } else { 2426 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2427 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2428 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2429 } 2430 if (!pcbddc->user_ChangeOfBasisMatrix) { 2431 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2432 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2433 } else { 2434 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2435 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2436 } 2437 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2438 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2439 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2440 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2441 } 2442 2443 /* get indices in local ordering for vertices and constraints */ 2444 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2445 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2446 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2447 } 2448 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2449 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2450 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2451 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2452 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2453 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2454 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2455 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2456 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2457 /* set quantities in PCBDDC data struct */ 2458 pcbddc->n_actual_vertices = i; 2459 /* check if a new primal space has been introduced */ 2460 pcbddc->new_primal_space_local = PETSC_TRUE; 2461 if (olocal_primal_size == pcbddc->local_primal_size) { 2462 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2463 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2464 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2465 } 2466 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2467 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2468 2469 /* flush dbg viewer */ 2470 if (pcbddc->dbg_flag) { 2471 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2472 } 2473 2474 /* free workspace */ 2475 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2476 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2477 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2478 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2479 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2480 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2486 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2487 { 2488 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2489 PC_IS *pcis = (PC_IS*)pc->data; 2490 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2491 PetscInt ierr,i,vertex_size; 2492 PetscViewer viewer=pcbddc->dbg_viewer; 2493 2494 PetscFunctionBegin; 2495 /* Reset previously computed graph */ 2496 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2497 /* Init local Graph struct */ 2498 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2499 2500 /* Check validity of the csr graph passed in by the user */ 2501 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2502 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2503 } 2504 2505 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2506 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2507 Mat mat_adj; 2508 PetscInt *xadj,*adjncy; 2509 PetscInt nvtxs; 2510 PetscBool flg_row=PETSC_TRUE; 2511 2512 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2513 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2514 if (!flg_row) { 2515 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2516 } 2517 if (pcbddc->use_local_adj) { 2518 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2519 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2520 } else { /* just compute subdomain's connected components */ 2521 IS is_dummy; 2522 ISLocalToGlobalMapping l2gmap_dummy; 2523 PetscInt j,sum; 2524 PetscInt *cxadj,*cadjncy; 2525 const PetscInt *idxs; 2526 PCBDDCGraph graph; 2527 PetscBT is_on_boundary; 2528 2529 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2530 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2531 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2532 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2533 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2534 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2535 graph->xadj = xadj; 2536 graph->adjncy = adjncy; 2537 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2538 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2539 2540 if (pcbddc->dbg_flag) { 2541 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2542 for (i=0;i<graph->ncc;i++) { 2543 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2544 } 2545 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2546 } 2547 2548 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2549 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2550 for (i=0;i<pcis->n_B;i++) { 2551 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2552 } 2553 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2554 2555 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2556 sum = 0; 2557 for (i=0;i<graph->ncc;i++) { 2558 PetscInt sizecc = 0; 2559 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2560 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2561 sizecc++; 2562 } 2563 } 2564 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2565 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2566 cxadj[graph->queue[j]] = sizecc; 2567 } 2568 } 2569 sum += sizecc*sizecc; 2570 } 2571 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2572 sum = 0; 2573 for (i=0;i<nvtxs;i++) { 2574 PetscInt temp = cxadj[i]; 2575 cxadj[i] = sum; 2576 sum += temp; 2577 } 2578 cxadj[nvtxs] = sum; 2579 for (i=0;i<graph->ncc;i++) { 2580 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2581 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2582 PetscInt k,sizecc = 0; 2583 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2584 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2585 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2586 sizecc++; 2587 } 2588 } 2589 } 2590 } 2591 } 2592 if (nvtxs) { 2593 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2594 } else { 2595 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2596 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2597 } 2598 graph->xadj = 0; 2599 graph->adjncy = 0; 2600 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2601 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2602 } 2603 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2604 if (!flg_row) { 2605 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2606 } 2607 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2608 } 2609 2610 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2611 vertex_size = 1; 2612 if (pcbddc->user_provided_isfordofs) { 2613 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2614 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2615 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2616 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2617 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2618 } 2619 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2620 pcbddc->n_ISForDofs = 0; 2621 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2622 } 2623 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2624 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2625 } else { 2626 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2627 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2628 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2629 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2630 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2631 } 2632 } 2633 } 2634 2635 /* Setup of Graph */ 2636 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2637 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2638 } 2639 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2640 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2641 } 2642 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2643 2644 /* Graph's connected components analysis */ 2645 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2646 2647 /* print some info to stdout */ 2648 if (pcbddc->dbg_flag) { 2649 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2650 } 2651 2652 /* mark topography has done */ 2653 pcbddc->recompute_topography = PETSC_FALSE; 2654 PetscFunctionReturn(0); 2655 } 2656 2657 #undef __FUNCT__ 2658 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2659 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2660 { 2661 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2662 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2663 PetscErrorCode ierr; 2664 2665 PetscFunctionBegin; 2666 n = 0; 2667 vertices = 0; 2668 if (pcbddc->ConstraintMatrix) { 2669 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2670 for (i=0;i<local_primal_size;i++) { 2671 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2672 if (size_of_constraint == 1) n++; 2673 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2674 } 2675 if (vertices_idx) { 2676 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2677 n = 0; 2678 for (i=0;i<local_primal_size;i++) { 2679 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2680 if (size_of_constraint == 1) { 2681 vertices[n++]=row_cmat_indices[0]; 2682 } 2683 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2684 } 2685 } 2686 } 2687 *n_vertices = n; 2688 if (vertices_idx) *vertices_idx = vertices; 2689 PetscFunctionReturn(0); 2690 } 2691 2692 #undef __FUNCT__ 2693 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2694 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2695 { 2696 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2697 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2698 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2699 PetscBT touched; 2700 PetscErrorCode ierr; 2701 2702 /* This function assumes that the number of local constraints per connected component 2703 is not greater than the number of nodes defined for the connected component 2704 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2705 PetscFunctionBegin; 2706 n = 0; 2707 constraints_index = 0; 2708 if (pcbddc->ConstraintMatrix) { 2709 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2710 max_size_of_constraint = 0; 2711 for (i=0;i<local_primal_size;i++) { 2712 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2713 if (size_of_constraint > 1) { 2714 n++; 2715 } 2716 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2717 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2718 } 2719 if (constraints_idx) { 2720 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2721 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2722 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2723 n = 0; 2724 for (i=0;i<local_primal_size;i++) { 2725 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2726 if (size_of_constraint > 1) { 2727 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2728 /* find first untouched local node */ 2729 j = 0; 2730 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2731 min_index = row_cmat_global_indices[j]; 2732 min_loc = j; 2733 /* search the minimum among nodes not yet touched on the connected component 2734 since there can be more than one constraint on a single cc */ 2735 for (j=1;j<size_of_constraint;j++) { 2736 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2737 min_index = row_cmat_global_indices[j]; 2738 min_loc = j; 2739 } 2740 } 2741 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2742 constraints_index[n++] = row_cmat_indices[min_loc]; 2743 } 2744 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2745 } 2746 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2747 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2748 } 2749 } 2750 *n_constraints = n; 2751 if (constraints_idx) *constraints_idx = constraints_index; 2752 PetscFunctionReturn(0); 2753 } 2754 2755 #undef __FUNCT__ 2756 #define __FUNCT__ "PCBDDCSubsetNumbering" 2757 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2758 { 2759 Vec local_vec,global_vec; 2760 IS seqis,paris; 2761 VecScatter scatter_ctx; 2762 PetscScalar *array; 2763 PetscInt *temp_global_dofs; 2764 PetscScalar globalsum; 2765 PetscInt i,j,s; 2766 PetscInt nlocals,first_index,old_index,max_local,max_global; 2767 PetscMPIInt rank_prec_comm,size_prec_comm; 2768 PetscInt *dof_sizes,*dof_displs; 2769 PetscBool first_found; 2770 PetscErrorCode ierr; 2771 2772 PetscFunctionBegin; 2773 /* mpi buffers */ 2774 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2775 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2776 j = ( !rank_prec_comm ? size_prec_comm : 0); 2777 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 2778 /* get maximum size of subset */ 2779 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2780 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2781 max_local = 0; 2782 for (i=0;i<n_local_dofs;i++) { 2783 if (max_local < temp_global_dofs[i] ) { 2784 max_local = temp_global_dofs[i]; 2785 } 2786 } 2787 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2788 max_global++; 2789 max_local = 0; 2790 for (i=0;i<n_local_dofs;i++) { 2791 if (max_local < local_dofs[i] ) { 2792 max_local = local_dofs[i]; 2793 } 2794 } 2795 max_local++; 2796 /* allocate workspace */ 2797 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2798 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2799 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2800 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2801 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2802 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2803 /* create scatter */ 2804 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2805 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2806 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2807 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2808 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2809 /* init array */ 2810 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2811 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2812 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2813 if (local_dofs_mult) { 2814 for (i=0;i<n_local_dofs;i++) { 2815 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2816 } 2817 } else { 2818 for (i=0;i<n_local_dofs;i++) { 2819 array[local_dofs[i]]=1.0; 2820 } 2821 } 2822 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2823 /* scatter into global vec and get total number of global dofs */ 2824 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2825 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2826 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2827 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2828 /* Fill global_vec with cumulative function for global numbering */ 2829 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2830 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2831 nlocals = 0; 2832 first_index = -1; 2833 first_found = PETSC_FALSE; 2834 for (i=0;i<s;i++) { 2835 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2836 first_found = PETSC_TRUE; 2837 first_index = i; 2838 } 2839 nlocals += (PetscInt)PetscRealPart(array[i]); 2840 } 2841 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2842 if (!rank_prec_comm) { 2843 dof_displs[0]=0; 2844 for (i=1;i<size_prec_comm;i++) { 2845 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2846 } 2847 } 2848 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2849 if (first_found) { 2850 array[first_index] += (PetscScalar)nlocals; 2851 old_index = first_index; 2852 for (i=first_index+1;i<s;i++) { 2853 if (PetscRealPart(array[i]) > 0.1) { 2854 array[i] += array[old_index]; 2855 old_index = i; 2856 } 2857 } 2858 } 2859 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2860 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2861 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2862 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2863 /* get global ordering of local dofs */ 2864 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2865 if (local_dofs_mult) { 2866 for (i=0;i<n_local_dofs;i++) { 2867 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2868 } 2869 } else { 2870 for (i=0;i<n_local_dofs;i++) { 2871 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2872 } 2873 } 2874 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2875 /* free workspace */ 2876 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2877 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2878 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2879 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 2880 /* return pointer to global ordering of local dofs */ 2881 *global_numbering_subset = temp_global_dofs; 2882 PetscFunctionReturn(0); 2883 } 2884 2885 #undef __FUNCT__ 2886 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2887 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2888 { 2889 PetscInt i,j; 2890 PetscScalar *alphas; 2891 PetscErrorCode ierr; 2892 2893 PetscFunctionBegin; 2894 /* this implements stabilized Gram-Schmidt */ 2895 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2896 for (i=0;i<n;i++) { 2897 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2898 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2899 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2900 } 2901 ierr = PetscFree(alphas);CHKERRQ(ierr); 2902 PetscFunctionReturn(0); 2903 } 2904 2905 #undef __FUNCT__ 2906 #define __FUNCT__ "MatISGetSubassemblingPattern" 2907 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2908 { 2909 Mat subdomain_adj; 2910 IS new_ranks,ranks_send_to; 2911 MatPartitioning partitioner; 2912 Mat_IS *matis; 2913 PetscInt n_neighs,*neighs,*n_shared,**shared; 2914 PetscInt prank; 2915 PetscMPIInt size,rank,color; 2916 PetscInt *xadj,*adjncy,*oldranks; 2917 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2918 PetscInt i,local_size,threshold=0; 2919 PetscErrorCode ierr; 2920 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2921 PetscSubcomm subcomm; 2922 2923 PetscFunctionBegin; 2924 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2925 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2926 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2927 2928 /* Get info on mapping */ 2929 matis = (Mat_IS*)(mat->data); 2930 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2931 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2932 2933 /* build local CSR graph of subdomains' connectivity */ 2934 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2935 xadj[0] = 0; 2936 xadj[1] = PetscMax(n_neighs-1,0); 2937 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2938 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2939 2940 if (threshold) { 2941 PetscInt xadj_count = 0; 2942 for (i=1;i<n_neighs;i++) { 2943 if (n_shared[i] > threshold) { 2944 adjncy[xadj_count] = neighs[i]; 2945 adjncy_wgt[xadj_count] = n_shared[i]; 2946 xadj_count++; 2947 } 2948 } 2949 xadj[1] = xadj_count; 2950 } else { 2951 if (xadj[1]) { 2952 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2953 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2954 } 2955 } 2956 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2957 if (use_square) { 2958 for (i=0;i<xadj[1];i++) { 2959 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2960 } 2961 } 2962 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2963 2964 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 2965 2966 /* 2967 Restrict work on active processes only. 2968 */ 2969 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2970 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2971 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2972 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2973 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2974 if (color) { 2975 ierr = PetscFree(xadj);CHKERRQ(ierr); 2976 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2977 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2978 } else { 2979 PetscInt coarsening_ratio; 2980 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 2981 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2982 prank = rank; 2983 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 2984 /* 2985 for (i=0;i<size;i++) { 2986 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2987 } 2988 */ 2989 for (i=0;i<xadj[1];i++) { 2990 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2991 } 2992 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2993 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2994 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2995 2996 /* Partition */ 2997 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 2998 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2999 if (use_vwgt) { 3000 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3001 v_wgt[0] = local_size; 3002 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3003 } 3004 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3005 coarsening_ratio = size/n_subdomains; 3006 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3007 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3008 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3009 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3010 3011 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3012 if (contiguous) { 3013 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3014 } else { 3015 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3016 } 3017 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3018 /* clean up */ 3019 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3020 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3021 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3022 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3023 } 3024 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3025 3026 /* assemble parallel IS for sends */ 3027 i = 1; 3028 if (color) i=0; 3029 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3030 3031 /* get back IS */ 3032 *is_sends = ranks_send_to; 3033 PetscFunctionReturn(0); 3034 } 3035 3036 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3037 3038 #undef __FUNCT__ 3039 #define __FUNCT__ "MatISSubassemble" 3040 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3041 { 3042 Mat local_mat; 3043 Mat_IS *matis; 3044 IS is_sends_internal; 3045 PetscInt rows,cols,new_local_rows; 3046 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3047 PetscBool ismatis,isdense,newisdense,destroy_mat; 3048 ISLocalToGlobalMapping l2gmap; 3049 PetscInt* l2gmap_indices; 3050 const PetscInt* is_indices; 3051 MatType new_local_type; 3052 /* buffers */ 3053 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3054 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3055 PetscInt *recv_buffer_idxs_local; 3056 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3057 /* MPI */ 3058 MPI_Comm comm,comm_n; 3059 PetscSubcomm subcomm; 3060 PetscMPIInt n_sends,n_recvs,commsize; 3061 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3062 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3063 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3064 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3065 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3066 PetscErrorCode ierr; 3067 3068 PetscFunctionBegin; 3069 /* TODO: add missing checks */ 3070 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3071 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3072 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3073 PetscValidLogicalCollectiveInt(mat,nis,7); 3074 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3075 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3076 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3077 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3078 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3079 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3080 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3081 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3082 PetscInt mrows,mcols,mnrows,mncols; 3083 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3084 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3085 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3086 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3087 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3088 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3089 } 3090 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3091 PetscValidLogicalCollectiveInt(mat,bs,0); 3092 /* prepare IS for sending if not provided */ 3093 if (!is_sends) { 3094 PetscBool pcontig = PETSC_TRUE; 3095 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3096 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3097 } else { 3098 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3099 is_sends_internal = is_sends; 3100 } 3101 3102 /* get pointer of MATIS data */ 3103 matis = (Mat_IS*)mat->data; 3104 3105 /* get comm */ 3106 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3107 3108 /* compute number of sends */ 3109 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3110 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3111 3112 /* compute number of receives */ 3113 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3114 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3115 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3116 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3117 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3118 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3119 ierr = PetscFree(iflags);CHKERRQ(ierr); 3120 3121 /* restrict comm if requested */ 3122 subcomm = 0; 3123 destroy_mat = PETSC_FALSE; 3124 if (restrict_comm) { 3125 PetscMPIInt color,rank,subcommsize; 3126 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3127 color = 0; 3128 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3129 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3130 subcommsize = commsize - subcommsize; 3131 /* check if reuse has been requested */ 3132 if (reuse == MAT_REUSE_MATRIX) { 3133 if (*mat_n) { 3134 PetscMPIInt subcommsize2; 3135 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3136 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3137 comm_n = PetscObjectComm((PetscObject)*mat_n); 3138 } else { 3139 comm_n = PETSC_COMM_SELF; 3140 } 3141 } else { /* MAT_INITIAL_MATRIX */ 3142 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3143 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3144 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3145 comm_n = PetscSubcommChild(subcomm); 3146 } 3147 /* flag to destroy *mat_n if not significative */ 3148 if (color) destroy_mat = PETSC_TRUE; 3149 } else { 3150 comm_n = comm; 3151 } 3152 3153 /* prepare send/receive buffers */ 3154 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3155 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3156 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3157 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3158 if (nis) { 3159 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3160 } 3161 3162 /* Get data from local matrices */ 3163 if (!isdense) { 3164 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3165 /* TODO: See below some guidelines on how to prepare the local buffers */ 3166 /* 3167 send_buffer_vals should contain the raw values of the local matrix 3168 send_buffer_idxs should contain: 3169 - MatType_PRIVATE type 3170 - PetscInt size_of_l2gmap 3171 - PetscInt global_row_indices[size_of_l2gmap] 3172 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3173 */ 3174 } else { 3175 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3176 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3177 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3178 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3179 send_buffer_idxs[1] = i; 3180 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3181 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3182 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3183 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3184 for (i=0;i<n_sends;i++) { 3185 ilengths_vals[is_indices[i]] = len*len; 3186 ilengths_idxs[is_indices[i]] = len+2; 3187 } 3188 } 3189 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3190 /* additional is (if any) */ 3191 if (nis) { 3192 PetscMPIInt psum; 3193 PetscInt j; 3194 for (j=0,psum=0;j<nis;j++) { 3195 PetscInt plen; 3196 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3197 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3198 psum += len+1; /* indices + lenght */ 3199 } 3200 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3201 for (j=0,psum=0;j<nis;j++) { 3202 PetscInt plen; 3203 const PetscInt *is_array_idxs; 3204 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3205 send_buffer_idxs_is[psum] = plen; 3206 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3207 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3208 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3209 psum += plen+1; /* indices + lenght */ 3210 } 3211 for (i=0;i<n_sends;i++) { 3212 ilengths_idxs_is[is_indices[i]] = psum; 3213 } 3214 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3215 } 3216 3217 buf_size_idxs = 0; 3218 buf_size_vals = 0; 3219 buf_size_idxs_is = 0; 3220 for (i=0;i<n_recvs;i++) { 3221 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3222 buf_size_vals += (PetscInt)olengths_vals[i]; 3223 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3224 } 3225 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3226 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3227 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3228 3229 /* get new tags for clean communications */ 3230 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3231 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3232 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3233 3234 /* allocate for requests */ 3235 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3236 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3237 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3238 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3239 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3240 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3241 3242 /* communications */ 3243 ptr_idxs = recv_buffer_idxs; 3244 ptr_vals = recv_buffer_vals; 3245 ptr_idxs_is = recv_buffer_idxs_is; 3246 for (i=0;i<n_recvs;i++) { 3247 source_dest = onodes[i]; 3248 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3249 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3250 ptr_idxs += olengths_idxs[i]; 3251 ptr_vals += olengths_vals[i]; 3252 if (nis) { 3253 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3254 ptr_idxs_is += olengths_idxs_is[i]; 3255 } 3256 } 3257 for (i=0;i<n_sends;i++) { 3258 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3259 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3260 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3261 if (nis) { 3262 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3263 } 3264 } 3265 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3266 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3267 3268 /* assemble new l2g map */ 3269 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3270 ptr_idxs = recv_buffer_idxs; 3271 new_local_rows = 0; 3272 for (i=0;i<n_recvs;i++) { 3273 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3274 ptr_idxs += olengths_idxs[i]; 3275 } 3276 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3277 ptr_idxs = recv_buffer_idxs; 3278 new_local_rows = 0; 3279 for (i=0;i<n_recvs;i++) { 3280 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3281 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3282 ptr_idxs += olengths_idxs[i]; 3283 } 3284 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3285 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3286 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3287 3288 /* infer new local matrix type from received local matrices type */ 3289 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3290 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3291 if (n_recvs) { 3292 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3293 ptr_idxs = recv_buffer_idxs; 3294 for (i=0;i<n_recvs;i++) { 3295 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3296 new_local_type_private = MATAIJ_PRIVATE; 3297 break; 3298 } 3299 ptr_idxs += olengths_idxs[i]; 3300 } 3301 switch (new_local_type_private) { 3302 case MATDENSE_PRIVATE: 3303 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3304 new_local_type = MATSEQAIJ; 3305 bs = 1; 3306 } else { /* if I receive only 1 dense matrix */ 3307 new_local_type = MATSEQDENSE; 3308 bs = 1; 3309 } 3310 break; 3311 case MATAIJ_PRIVATE: 3312 new_local_type = MATSEQAIJ; 3313 bs = 1; 3314 break; 3315 case MATBAIJ_PRIVATE: 3316 new_local_type = MATSEQBAIJ; 3317 break; 3318 case MATSBAIJ_PRIVATE: 3319 new_local_type = MATSEQSBAIJ; 3320 break; 3321 default: 3322 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3323 break; 3324 } 3325 } else { /* by default, new_local_type is seqdense */ 3326 new_local_type = MATSEQDENSE; 3327 bs = 1; 3328 } 3329 3330 /* create MATIS object if needed */ 3331 if (reuse == MAT_INITIAL_MATRIX) { 3332 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3333 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3334 } else { 3335 /* it also destroys the local matrices */ 3336 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3337 } 3338 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3339 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3340 3341 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3342 3343 /* Global to local map of received indices */ 3344 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3345 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3346 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3347 3348 /* restore attributes -> type of incoming data and its size */ 3349 buf_size_idxs = 0; 3350 for (i=0;i<n_recvs;i++) { 3351 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3352 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3353 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3354 } 3355 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3356 3357 /* set preallocation */ 3358 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3359 if (!newisdense) { 3360 PetscInt *new_local_nnz=0; 3361 3362 ptr_vals = recv_buffer_vals; 3363 ptr_idxs = recv_buffer_idxs_local; 3364 if (n_recvs) { 3365 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3366 } 3367 for (i=0;i<n_recvs;i++) { 3368 PetscInt j; 3369 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3370 for (j=0;j<*(ptr_idxs+1);j++) { 3371 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3372 } 3373 } else { 3374 /* TODO */ 3375 } 3376 ptr_idxs += olengths_idxs[i]; 3377 } 3378 if (new_local_nnz) { 3379 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3380 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3381 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3382 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3383 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3384 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3385 } else { 3386 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3387 } 3388 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3389 } else { 3390 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3391 } 3392 3393 /* set values */ 3394 ptr_vals = recv_buffer_vals; 3395 ptr_idxs = recv_buffer_idxs_local; 3396 for (i=0;i<n_recvs;i++) { 3397 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3398 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3399 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3400 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3401 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3402 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3403 } else { 3404 /* TODO */ 3405 } 3406 ptr_idxs += olengths_idxs[i]; 3407 ptr_vals += olengths_vals[i]; 3408 } 3409 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3410 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3411 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3412 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3413 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3414 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3415 3416 #if 0 3417 if (!restrict_comm) { /* check */ 3418 Vec lvec,rvec; 3419 PetscReal infty_error; 3420 3421 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3422 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3423 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3424 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3425 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3426 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3427 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3428 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3429 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3430 } 3431 #endif 3432 3433 /* assemble new additional is (if any) */ 3434 if (nis) { 3435 PetscInt **temp_idxs,*count_is,j,psum; 3436 3437 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3438 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3439 ptr_idxs = recv_buffer_idxs_is; 3440 psum = 0; 3441 for (i=0;i<n_recvs;i++) { 3442 for (j=0;j<nis;j++) { 3443 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3444 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3445 psum += plen; 3446 ptr_idxs += plen+1; /* shift pointer to received data */ 3447 } 3448 } 3449 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3450 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3451 for (i=1;i<nis;i++) { 3452 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3453 } 3454 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3455 ptr_idxs = recv_buffer_idxs_is; 3456 for (i=0;i<n_recvs;i++) { 3457 for (j=0;j<nis;j++) { 3458 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3459 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3460 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3461 ptr_idxs += plen+1; /* shift pointer to received data */ 3462 } 3463 } 3464 for (i=0;i<nis;i++) { 3465 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3466 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3467 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3468 } 3469 ierr = PetscFree(count_is);CHKERRQ(ierr); 3470 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3471 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3472 } 3473 /* free workspace */ 3474 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3475 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3476 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3477 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3478 if (isdense) { 3479 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3480 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3481 } else { 3482 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3483 } 3484 if (nis) { 3485 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3486 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3487 } 3488 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3489 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3490 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3491 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3492 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3493 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3494 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3495 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3496 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3497 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3498 ierr = PetscFree(onodes);CHKERRQ(ierr); 3499 if (nis) { 3500 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3501 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3502 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3503 } 3504 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3505 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3506 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3507 for (i=0;i<nis;i++) { 3508 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3509 } 3510 } 3511 PetscFunctionReturn(0); 3512 } 3513 3514 /* temporary hack into ksp private data structure */ 3515 #include <petsc-private/kspimpl.h> 3516 3517 #undef __FUNCT__ 3518 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3519 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3520 { 3521 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3522 PC_IS *pcis = (PC_IS*)pc->data; 3523 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3524 MatNullSpace CoarseNullSpace=NULL; 3525 ISLocalToGlobalMapping coarse_islg; 3526 IS coarse_is,*isarray; 3527 PetscInt i,im_active=-1,active_procs=-1; 3528 PetscInt nis,nisdofs,nisneu; 3529 PC pc_temp; 3530 PCType coarse_pc_type; 3531 KSPType coarse_ksp_type; 3532 PetscBool multilevel_requested,multilevel_allowed; 3533 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3534 Mat t_coarse_mat_is; 3535 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3536 PetscMPIInt all_procs; 3537 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3538 PetscBool compute_vecs = PETSC_FALSE; 3539 PetscScalar *array; 3540 PetscErrorCode ierr; 3541 3542 PetscFunctionBegin; 3543 /* Assign global numbering to coarse dofs */ 3544 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3545 PetscInt ocoarse_size; 3546 compute_vecs = PETSC_TRUE; 3547 ocoarse_size = pcbddc->coarse_size; 3548 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3549 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3550 /* see if we can avoid some work */ 3551 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3552 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3553 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3554 coarse_reuse = PETSC_FALSE; 3555 } else { /* we can safely reuse already computed coarse matrix */ 3556 coarse_reuse = PETSC_TRUE; 3557 } 3558 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3559 coarse_reuse = PETSC_FALSE; 3560 } 3561 /* reset any subassembling information */ 3562 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3563 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3564 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3565 coarse_reuse = PETSC_TRUE; 3566 } 3567 3568 /* count "active" (i.e. with positive local size) and "void" processes */ 3569 im_active = !!(pcis->n); 3570 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3571 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3572 void_procs = all_procs-active_procs; 3573 csin_type_simple = PETSC_TRUE; 3574 redist = PETSC_FALSE; 3575 if (pcbddc->current_level && void_procs) { 3576 csin_ml = PETSC_TRUE; 3577 ncoarse_ml = void_procs; 3578 csin_ds = PETSC_TRUE; 3579 ncoarse_ds = void_procs; 3580 } else { 3581 csin_ml = PETSC_FALSE; 3582 ncoarse_ml = all_procs; 3583 if (void_procs) { 3584 csin_ds = PETSC_TRUE; 3585 ncoarse_ds = void_procs; 3586 csin_type_simple = PETSC_FALSE; 3587 } else { 3588 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3589 csin_ds = PETSC_TRUE; 3590 ncoarse_ds = pcbddc->redistribute_coarse; 3591 redist = PETSC_TRUE; 3592 } else { 3593 csin_ds = PETSC_FALSE; 3594 ncoarse_ds = all_procs; 3595 } 3596 } 3597 } 3598 3599 /* 3600 test if we can go multilevel: three conditions must be satisfied: 3601 - we have not exceeded the number of levels requested 3602 - we can actually subassemble the active processes 3603 - we can find a suitable number of MPI processes where we can place the subassembled problem 3604 */ 3605 multilevel_allowed = PETSC_FALSE; 3606 multilevel_requested = PETSC_FALSE; 3607 if (pcbddc->current_level < pcbddc->max_levels) { 3608 multilevel_requested = PETSC_TRUE; 3609 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3610 multilevel_allowed = PETSC_FALSE; 3611 } else { 3612 multilevel_allowed = PETSC_TRUE; 3613 } 3614 } 3615 /* determine number of process partecipating to coarse solver */ 3616 if (multilevel_allowed) { 3617 ncoarse = ncoarse_ml; 3618 csin = csin_ml; 3619 } else { 3620 ncoarse = ncoarse_ds; 3621 csin = csin_ds; 3622 } 3623 3624 /* creates temporary l2gmap and IS for coarse indexes */ 3625 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3626 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3627 3628 /* creates temporary MATIS object for coarse matrix */ 3629 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3630 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3631 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3632 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3633 #if 0 3634 { 3635 PetscViewer viewer; 3636 char filename[256]; 3637 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3638 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3639 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3640 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3641 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3642 } 3643 #endif 3644 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3645 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3646 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3647 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3648 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3649 3650 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3651 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3652 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3653 const PetscInt *idxs; 3654 ISLocalToGlobalMapping tmap; 3655 3656 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3657 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3658 /* allocate space for temporary storage */ 3659 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3660 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3661 /* allocate for IS array */ 3662 nisdofs = pcbddc->n_ISForDofsLocal; 3663 nisneu = !!pcbddc->NeumannBoundariesLocal; 3664 nis = nisdofs + nisneu; 3665 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3666 /* dofs splitting */ 3667 for (i=0;i<nisdofs;i++) { 3668 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3669 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3670 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3671 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3672 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3673 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3674 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3675 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3676 } 3677 /* neumann boundaries */ 3678 if (pcbddc->NeumannBoundariesLocal) { 3679 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3680 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3681 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3682 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3683 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3684 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3685 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3686 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3687 } 3688 /* free memory */ 3689 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3690 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3691 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3692 } else { 3693 nis = 0; 3694 nisdofs = 0; 3695 nisneu = 0; 3696 isarray = NULL; 3697 } 3698 /* destroy no longer needed map */ 3699 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3700 3701 /* restrict on coarse candidates (if needed) */ 3702 coarse_mat_is = NULL; 3703 if (csin) { 3704 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3705 if (redist) { 3706 PetscMPIInt rank; 3707 PetscInt spc,n_spc_p1,dest[1]; 3708 3709 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3710 spc = all_procs/pcbddc->redistribute_coarse; 3711 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 3712 if (rank > n_spc_p1*(spc+1)-1) { 3713 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3714 } else { 3715 dest[0] = rank/(spc+1); 3716 } 3717 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3718 } else { 3719 PetscInt j,tissize,*nisindices; 3720 PetscInt *coarse_candidates; 3721 const PetscInt* tisindices; 3722 /* get coarse candidates' ranks in pc communicator */ 3723 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 3724 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3725 for (i=0,j=0;i<all_procs;i++) { 3726 if (!coarse_candidates[i]) { 3727 coarse_candidates[j]=i; 3728 j++; 3729 } 3730 } 3731 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3732 /* get a suitable subassembling pattern */ 3733 if (csin_type_simple) { 3734 PetscMPIInt rank; 3735 PetscInt issize,isidx; 3736 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3737 if (im_active) { 3738 issize = 1; 3739 isidx = (PetscInt)rank; 3740 } else { 3741 issize = 0; 3742 isidx = -1; 3743 } 3744 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3745 } else { 3746 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3747 } 3748 if (pcbddc->dbg_flag) { 3749 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3750 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3751 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3752 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3753 for (i=0;i<j;i++) { 3754 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3755 } 3756 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3757 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3758 } 3759 /* shift the pattern on coarse candidates */ 3760 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3761 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3762 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 3763 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3764 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3765 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3766 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3767 } 3768 } 3769 if (pcbddc->dbg_flag) { 3770 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3771 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3772 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3773 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3774 } 3775 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3776 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3777 } else { 3778 if (pcbddc->dbg_flag) { 3779 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3780 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3781 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3782 } 3783 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3784 coarse_mat_is = t_coarse_mat_is; 3785 } 3786 3787 /* create local to global scatters for coarse problem */ 3788 if (compute_vecs) { 3789 PetscInt lrows; 3790 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3791 if (coarse_mat_is) { 3792 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3793 } else { 3794 lrows = 0; 3795 } 3796 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3797 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3798 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3799 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3800 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3801 } 3802 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3803 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3804 3805 /* set defaults for coarse KSP and PC */ 3806 if (multilevel_allowed) { 3807 coarse_ksp_type = KSPRICHARDSON; 3808 coarse_pc_type = PCBDDC; 3809 } else { 3810 coarse_ksp_type = KSPPREONLY; 3811 coarse_pc_type = PCREDUNDANT; 3812 } 3813 3814 /* print some info if requested */ 3815 if (pcbddc->dbg_flag) { 3816 if (!multilevel_allowed) { 3817 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3818 if (multilevel_requested) { 3819 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3820 } else if (pcbddc->max_levels) { 3821 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3822 } 3823 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3824 } 3825 } 3826 3827 /* create the coarse KSP object only once with defaults */ 3828 if (coarse_mat_is) { 3829 MatReuse coarse_mat_reuse; 3830 PetscViewer dbg_viewer = NULL; 3831 if (pcbddc->dbg_flag) { 3832 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3833 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3834 } 3835 if (!pcbddc->coarse_ksp) { 3836 char prefix[256],str_level[16]; 3837 size_t len; 3838 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3839 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3840 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3841 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3842 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3843 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3844 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3845 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3846 /* prefix */ 3847 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3848 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3849 if (!pcbddc->current_level) { 3850 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3851 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3852 } else { 3853 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3854 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3855 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3856 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3857 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3858 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3859 } 3860 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3861 /* allow user customization */ 3862 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3863 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3864 } 3865 3866 /* get some info after set from options */ 3867 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3868 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3869 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3870 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3871 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3872 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3873 isbddc = PETSC_FALSE; 3874 } 3875 if (isredundant) { 3876 KSP inner_ksp; 3877 PC inner_pc; 3878 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3879 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3880 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3881 } 3882 3883 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3884 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3885 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3886 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3887 if (nisdofs) { 3888 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3889 for (i=0;i<nisdofs;i++) { 3890 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3891 } 3892 } 3893 if (nisneu) { 3894 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3895 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3896 } 3897 3898 /* assemble coarse matrix */ 3899 if (coarse_reuse) { 3900 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3901 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3902 coarse_mat_reuse = MAT_REUSE_MATRIX; 3903 } else { 3904 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3905 } 3906 if (isbddc || isnn) { 3907 if (pcbddc->coarsening_ratio > 1) { 3908 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3909 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3910 if (pcbddc->dbg_flag) { 3911 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3912 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3913 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3914 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3915 } 3916 } 3917 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3918 } else { 3919 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 3920 coarse_mat = coarse_mat_is; 3921 } 3922 } else { 3923 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3924 } 3925 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3926 3927 /* propagate symmetry info to coarse matrix */ 3928 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3929 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3930 3931 /* set operators */ 3932 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3933 if (pcbddc->dbg_flag) { 3934 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3935 } 3936 } else { /* processes non partecipating to coarse solver (if any) */ 3937 coarse_mat = 0; 3938 } 3939 ierr = PetscFree(isarray);CHKERRQ(ierr); 3940 #if 0 3941 { 3942 PetscViewer viewer; 3943 char filename[256]; 3944 sprintf(filename,"coarse_mat.m"); 3945 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3946 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3947 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3948 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3949 } 3950 #endif 3951 3952 /* Compute coarse null space (special handling by BDDC only) */ 3953 if (pcbddc->NullSpace) { 3954 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3955 } 3956 3957 if (pcbddc->coarse_ksp) { 3958 Vec crhs,csol; 3959 PetscBool ispreonly; 3960 if (CoarseNullSpace) { 3961 if (isbddc) { 3962 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3963 } else { 3964 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3965 } 3966 } 3967 /* setup coarse ksp */ 3968 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3969 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3970 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3971 /* hack */ 3972 if (!csol) { 3973 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3974 } 3975 if (!crhs) { 3976 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3977 } 3978 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3979 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3980 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3981 KSP check_ksp; 3982 KSPType check_ksp_type; 3983 PC check_pc; 3984 Vec check_vec,coarse_vec; 3985 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3986 PetscInt its; 3987 PetscBool compute_eigs; 3988 PetscReal *eigs_r,*eigs_c; 3989 PetscInt neigs; 3990 const char *prefix; 3991 3992 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3993 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3994 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3995 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3996 if (ispreonly) { 3997 check_ksp_type = KSPPREONLY; 3998 compute_eigs = PETSC_FALSE; 3999 } else { 4000 check_ksp_type = KSPGMRES; 4001 compute_eigs = PETSC_TRUE; 4002 } 4003 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4004 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4005 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4006 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4007 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4008 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4009 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4010 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4011 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4012 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4013 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4014 /* create random vec */ 4015 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4016 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4017 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4018 if (CoarseNullSpace) { 4019 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4020 } 4021 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4022 /* solve coarse problem */ 4023 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4024 if (CoarseNullSpace) { 4025 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4026 } 4027 /* set eigenvalue estimation if preonly has not been requested */ 4028 if (compute_eigs) { 4029 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4030 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4031 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4032 lambda_max = eigs_r[neigs-1]; 4033 lambda_min = eigs_r[0]; 4034 if (pcbddc->use_coarse_estimates) { 4035 if (lambda_max>lambda_min) { 4036 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4037 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4038 } 4039 } 4040 } 4041 4042 /* check coarse problem residual error */ 4043 if (pcbddc->dbg_flag) { 4044 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4045 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4046 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4047 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4048 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4049 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4050 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4051 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4052 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4053 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4054 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4055 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4056 if (compute_eigs) { 4057 PetscReal lambda_max_s,lambda_min_s; 4058 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4059 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4060 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4061 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4062 for (i=0;i<neigs;i++) { 4063 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4064 } 4065 } 4066 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4067 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4068 } 4069 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4070 if (compute_eigs) { 4071 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4072 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4073 } 4074 } 4075 } 4076 /* print additional info */ 4077 if (pcbddc->dbg_flag) { 4078 /* waits until all processes reaches this point */ 4079 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4080 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4081 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4082 } 4083 4084 /* free memory */ 4085 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4086 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4087 PetscFunctionReturn(0); 4088 } 4089 4090 #undef __FUNCT__ 4091 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4092 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4093 { 4094 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4095 PC_IS* pcis = (PC_IS*)pc->data; 4096 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4097 PetscInt i,coarse_size; 4098 PetscInt *local_primal_indices; 4099 PetscErrorCode ierr; 4100 4101 PetscFunctionBegin; 4102 /* Compute global number of coarse dofs */ 4103 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4104 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4105 } 4106 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4107 4108 /* check numbering */ 4109 if (pcbddc->dbg_flag) { 4110 PetscScalar coarsesum,*array; 4111 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4112 4113 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4114 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4115 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4116 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4117 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4118 for (i=0;i<pcbddc->local_primal_size;i++) { 4119 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4120 } 4121 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4122 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4123 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4124 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4125 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4126 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4127 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4128 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4129 for (i=0;i<pcis->n;i++) { 4130 if (array[i] == 1.0) { 4131 set_error = PETSC_TRUE; 4132 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4133 } 4134 } 4135 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4136 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4137 for (i=0;i<pcis->n;i++) { 4138 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4139 } 4140 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4141 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4142 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4143 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4144 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4146 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4147 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4148 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4149 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4150 for (i=0;i<pcbddc->local_primal_size;i++) { 4151 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4152 } 4153 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4154 } 4155 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4156 if (set_error_reduced) { 4157 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4158 } 4159 } 4160 /* get back data */ 4161 *coarse_size_n = coarse_size; 4162 *local_primal_indices_n = local_primal_indices; 4163 PetscFunctionReturn(0); 4164 } 4165 4166 #undef __FUNCT__ 4167 #define __FUNCT__ "PCBDDCGlobalToLocal" 4168 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4169 { 4170 IS localis_t; 4171 PetscInt i,lsize,*idxs,n; 4172 PetscScalar *vals; 4173 PetscErrorCode ierr; 4174 4175 PetscFunctionBegin; 4176 /* get indices in local ordering exploiting local to global map */ 4177 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4178 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4179 for (i=0;i<lsize;i++) vals[i] = 1.0; 4180 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4181 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4182 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4183 if (idxs) { /* multilevel guard */ 4184 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4185 } 4186 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4187 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4188 ierr = PetscFree(vals);CHKERRQ(ierr); 4189 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4190 /* now compute set in local ordering */ 4191 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4192 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4193 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4194 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4195 for (i=0,lsize=0;i<n;i++) { 4196 if (PetscRealPart(vals[i]) > 0.5) { 4197 lsize++; 4198 } 4199 } 4200 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4201 for (i=0,lsize=0;i<n;i++) { 4202 if (PetscRealPart(vals[i]) > 0.5) { 4203 idxs[lsize++] = i; 4204 } 4205 } 4206 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4207 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4208 *localis = localis_t; 4209 PetscFunctionReturn(0); 4210 } 4211 4212 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4213 #undef __FUNCT__ 4214 #define __FUNCT__ "PCBDDCMatMult_Private" 4215 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4216 { 4217 PCBDDCChange_ctx change_ctx; 4218 PetscErrorCode ierr; 4219 4220 PetscFunctionBegin; 4221 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4222 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4223 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4224 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4225 PetscFunctionReturn(0); 4226 } 4227 4228 #undef __FUNCT__ 4229 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4230 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4231 { 4232 PCBDDCChange_ctx change_ctx; 4233 PetscErrorCode ierr; 4234 4235 PetscFunctionBegin; 4236 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4237 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4238 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4239 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4240 PetscFunctionReturn(0); 4241 } 4242