1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCSetUpSolvers" 10 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscScalar *coarse_submat_vals; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 /* Compute matrix after change of basis and extract local submatrices */ 18 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D */ 21 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 22 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 23 24 /* Setup local solvers ksp_D and ksp_R */ 25 /* PCBDDCSetUpLocalScatters should be called first! */ 26 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 27 28 /* Change global null space passed in by the user if change of basis has been requested */ 29 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 30 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 31 } 32 33 /* 34 Setup local correction and local part of coarse basis. 35 Gives back the dense local part of the coarse matrix in column major ordering 36 */ 37 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 38 39 /* Compute total number of coarse nodes and setup coarse solver */ 40 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 41 42 /* free */ 43 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "PCBDDCResetCustomization" 49 PetscErrorCode PCBDDCResetCustomization(PC pc) 50 { 51 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 57 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 61 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 62 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 63 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 64 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 65 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCResetTopography" 71 PetscErrorCode PCBDDCResetTopography(PC pc) 72 { 73 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 78 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 79 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 80 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 #undef __FUNCT__ 85 #define __FUNCT__ "PCBDDCResetSolvers" 86 PetscErrorCode PCBDDCResetSolvers(PC pc) 87 { 88 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 93 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 101 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 109 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 110 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 111 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 112 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 113 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 114 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 115 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 122 { 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PC_IS *pcis = (PC_IS*)pc->data; 125 VecType impVecType; 126 PetscInt n_constraints,n_R,old_size; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 if (!pcbddc->ConstraintMatrix) { 131 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 132 } 133 /* get sizes */ 134 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 135 n_R = pcis->n-pcbddc->n_actual_vertices; 136 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 137 /* local work vectors (try to avoid unneeded work)*/ 138 /* R nodes */ 139 old_size = -1; 140 if (pcbddc->vec1_R) { 141 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 142 } 143 if (n_R != old_size) { 144 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 146 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 147 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 148 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 149 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 150 } 151 /* local primal dofs */ 152 old_size = -1; 153 if (pcbddc->vec1_P) { 154 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 155 } 156 if (pcbddc->local_primal_size != old_size) { 157 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 158 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 159 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 160 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 161 } 162 /* local explicit constraints */ 163 old_size = -1; 164 if (pcbddc->vec1_C) { 165 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 166 } 167 if (n_constraints && n_constraints != old_size) { 168 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 169 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 170 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 171 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "PCBDDCSetUpCorrection" 178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 179 { 180 PetscErrorCode ierr; 181 /* pointers to pcis and pcbddc */ 182 PC_IS* pcis = (PC_IS*)pc->data; 183 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 184 /* submatrices of local problem */ 185 Mat A_RV,A_VR,A_VV; 186 /* working matrices */ 187 Mat M1,M2,M3,C_CR; 188 /* working vectors */ 189 Vec vec1_C,vec2_C,vec1_V,vec2_V; 190 /* additional working stuff */ 191 IS is_aux; 192 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 193 const PetscScalar *array,*row_cmat_values; 194 const PetscInt *row_cmat_indices,*idx_R_local; 195 PetscInt *idx_V_B,*auxindices; 196 PetscInt n_vertices,n_constraints,size_of_constraint; 197 PetscInt i,j,n_R,n_D,n_B; 198 PetscBool unsymmetric_check; 199 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 200 MatType impMatType; 201 /* some shortcuts to scalars */ 202 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 203 /* for debugging purposes */ 204 PetscReal *coarsefunctions_errors,*constraints_errors; 205 206 PetscFunctionBegin; 207 /* get number of vertices (corners plus constraints with change of basis) 208 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 209 n_vertices = pcbddc->n_actual_vertices; 210 n_constraints = pcbddc->local_primal_size-n_vertices; 211 /* Set Non-overlapping dimensions */ 212 n_B = pcis->n_B; n_D = pcis->n - n_B; 213 n_R = pcis->n-n_vertices; 214 215 /* Set types for local objects needed by BDDC precondtioner */ 216 impMatType = MATSEQDENSE; 217 218 /* Allocating some extra storage just to be safe */ 219 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 220 for (i=0;i<pcis->n;i++) auxindices[i]=i; 221 222 /* vertices in boundary numbering */ 223 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 224 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 225 if (i != n_vertices) { 226 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 227 } 228 229 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 230 if (n_constraints) { 231 /* see if we can save some allocations */ 232 if (pcbddc->local_auxmat2) { 233 PetscInt on_R,on_constraints; 234 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 235 if (on_R != n_R || on_constraints != n_constraints) { 236 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 237 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 238 } 239 } 240 /* work vectors */ 241 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 242 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 243 /* auxiliary matrices */ 244 if (!pcbddc->local_auxmat2) { 245 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 246 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 247 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 249 } 250 251 /* Extract constraints on R nodes: C_{CR} */ 252 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 253 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 254 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 255 256 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 257 for (i=0;i<n_constraints;i++) { 258 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 259 /* Get row of constraint matrix in R numbering */ 260 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 262 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 263 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 264 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 265 /* Solve for row of constraint matrix in R numbering */ 266 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 267 /* Set values in local_auxmat2 */ 268 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 270 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 271 } 272 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 274 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 275 276 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 277 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 278 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 279 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 280 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 281 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 282 ierr = MatSetUp(M1);CHKERRQ(ierr); 283 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 284 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 285 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 286 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 287 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 288 ierr = MatDestroy(&M2);CHKERRQ(ierr); 289 ierr = MatDestroy(&M3);CHKERRQ(ierr); 290 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 291 if (!pcbddc->local_auxmat1) { 292 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } else { 294 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 295 } 296 } 297 298 /* Get submatrices from subdomain matrix */ 299 if (n_vertices) { 300 PetscInt ibs,mbs; 301 PetscBool issbaij; 302 Mat newmat; 303 304 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 305 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 306 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 307 if (ibs != mbs) { /* need to convert to SEQAIJ */ 308 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 310 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 311 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 313 } else { 314 /* this is safe */ 315 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 316 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 317 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 318 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 319 /* which of the two approaches is faster? */ 320 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 322 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 324 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 325 } else { 326 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 327 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 328 } 329 } 330 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 331 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 332 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 333 } 334 335 /* Matrix of coarse basis functions (local) */ 336 if (pcbddc->coarse_phi_B) { 337 PetscInt on_B,on_primal; 338 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 339 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 340 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 341 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 342 } 343 } 344 if (pcbddc->coarse_phi_D) { 345 PetscInt on_D,on_primal; 346 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 347 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 348 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 349 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 350 } 351 } 352 if (!pcbddc->coarse_phi_B) { 353 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 355 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 356 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 357 } 358 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 359 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 361 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 362 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 363 } 364 365 if (pcbddc->dbg_flag) { 366 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 367 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 368 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 369 } 370 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 371 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 372 373 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 374 375 /* vertices */ 376 for (i=0;i<n_vertices;i++) { 377 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 378 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 379 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 380 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 381 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 382 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 383 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 384 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 387 if (n_constraints) { 388 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 389 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 390 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 391 } 392 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 393 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 394 395 /* Set values in coarse basis function and subdomain part of coarse_mat */ 396 /* coarse basis functions */ 397 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 398 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 399 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 400 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 402 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 403 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 404 if (pcbddc->switch_static || pcbddc->dbg_flag) { 405 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 406 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 410 } 411 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 412 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 415 if (n_constraints) { 416 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 418 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 419 } 420 421 /* check */ 422 if (pcbddc->dbg_flag) { 423 /* assemble subdomain vector on local nodes */ 424 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 425 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 if (n_R) { 427 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 428 } 429 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 430 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 432 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 433 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 434 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 435 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 437 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 438 if (n_constraints) { 439 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 441 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 442 } 443 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 444 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 445 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 446 /* check saddle point solution */ 447 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 448 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 449 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 450 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 451 /* shift by the identity matrix */ 452 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 453 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 455 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 456 } 457 } 458 459 /* constraints */ 460 for (i=0;i<n_constraints;i++) { 461 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 462 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 463 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 464 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 465 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 466 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 467 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 468 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 469 if (n_vertices) { 470 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 471 } 472 /* Set values in coarse basis function and subdomain part of coarse_mat */ 473 /* coarse basis functions */ 474 j = i+n_vertices; /* don't touch this! */ 475 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 476 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 479 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 481 if (pcbddc->switch_static || pcbddc->dbg_flag) { 482 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 483 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 484 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 485 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 487 } 488 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 489 if (n_vertices) { 490 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 491 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 492 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 493 } 494 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 495 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 496 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 497 498 if (pcbddc->dbg_flag) { 499 /* assemble subdomain vector on nodes */ 500 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 501 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 502 if (n_R) { 503 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 504 } 505 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 506 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 507 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 508 /* assemble subdomain vector of lagrange multipliers */ 509 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 510 if (n_vertices) { 511 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 512 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 514 } 515 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 516 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 517 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 518 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 519 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 521 /* check saddle point solution */ 522 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 523 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 524 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 525 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 526 /* shift by the identity matrix */ 527 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 528 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 529 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 530 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 531 } 532 } 533 /* call assembling routines for local coarse basis */ 534 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 535 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 536 if (pcbddc->switch_static || pcbddc->dbg_flag) { 537 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 538 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 539 } 540 541 /* compute other basis functions for non-symmetric problems */ 542 /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/ 543 { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */ 544 PetscBool setsym; 545 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr); 546 if (!setsym) pcbddc->issym = PETSC_FALSE; 547 } 548 549 if (!pcbddc->issym) { 550 if (!pcbddc->coarse_psi_B) { 551 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 552 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 553 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 554 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 555 } 556 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 557 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 558 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 560 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 561 } 562 for (i=0;i<pcbddc->local_primal_size;i++) { 563 if (n_constraints) { 564 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 565 for (j=0;j<n_constraints;j++) { 566 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 567 } 568 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 569 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 570 } 571 if (i<n_vertices) { 572 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 573 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 574 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 575 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 576 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 577 if (n_constraints) { 578 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 579 } 580 } else { 581 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 582 } 583 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 584 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 585 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 586 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 587 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 588 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 589 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 590 if (i<n_vertices) { 591 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 592 } 593 if (pcbddc->switch_static || pcbddc->dbg_flag) { 594 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 595 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 596 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 597 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 598 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 599 } 600 601 if (pcbddc->dbg_flag) { 602 /* assemble subdomain vector on nodes */ 603 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 604 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 605 if (n_R) { 606 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 607 } 608 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 609 if (i<n_vertices) { 610 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 611 } 612 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 614 /* assemble subdomain vector of lagrange multipliers */ 615 for (j=0;j<pcbddc->local_primal_size;j++) { 616 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 617 } 618 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 619 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 620 /* check saddle point solution */ 621 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 622 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 623 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 624 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 625 /* shift by the identity matrix */ 626 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 627 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 628 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 629 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 630 } 631 } 632 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 633 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 634 if (pcbddc->switch_static || pcbddc->dbg_flag) { 635 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 637 } 638 unsymmetric_check = PETSC_TRUE; 639 } else { /* take references to already computed coarse basis */ 640 unsymmetric_check = PETSC_FALSE; 641 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 642 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 643 if (pcbddc->coarse_phi_D) { 644 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 645 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 646 } 647 } 648 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 649 /* Checking coarse_sub_mat and coarse basis functios */ 650 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 651 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 652 if (pcbddc->dbg_flag) { 653 Mat coarse_sub_mat; 654 Mat AUXMAT,TM1,TM2,TM3,TM4; 655 Mat coarse_phi_D,coarse_phi_B; 656 Mat coarse_psi_D,coarse_psi_B; 657 Mat A_II,A_BB,A_IB,A_BI; 658 MatType checkmattype=MATSEQAIJ; 659 PetscReal real_value; 660 661 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 662 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 663 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 664 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 665 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 666 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 667 if (unsymmetric_check) { 668 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 669 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 670 } 671 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 672 673 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 674 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 676 if (unsymmetric_check) { 677 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 684 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 685 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 686 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 687 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 688 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 689 } else { 690 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 691 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 692 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 693 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 694 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 695 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 696 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 697 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 698 } 699 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 700 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 701 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 702 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 703 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 704 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 705 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 710 for (i=0;i<pcbddc->local_primal_size;i++) { 711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 712 } 713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 714 for (i=0;i<pcbddc->local_primal_size;i++) { 715 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 716 } 717 if (unsymmetric_check) { 718 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 719 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 720 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 721 } 722 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 723 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 724 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 725 } 726 } 727 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 729 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 730 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 731 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 732 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 733 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 734 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 735 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 736 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 737 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 738 if (unsymmetric_check) { 739 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 740 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 741 } 742 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 743 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 744 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 745 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 746 } 747 /* free memory */ 748 if (n_vertices) { 749 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 750 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 751 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 752 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 753 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 754 } 755 if (n_constraints) { 756 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 757 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 758 ierr = MatDestroy(&M1);CHKERRQ(ierr); 759 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 760 } 761 ierr = PetscFree(auxindices);CHKERRQ(ierr); 762 /* get back data */ 763 *coarse_submat_vals_n = coarse_submat_vals; 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNCT__ 768 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 769 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 770 { 771 PC_IS* pcis = (PC_IS*)(pc->data); 772 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 773 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 774 PetscBool issbaij,isseqaij; 775 /* manage repeated solves */ 776 MatReuse reuse; 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 781 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 782 } 783 /* get mat flags */ 784 reuse = MAT_INITIAL_MATRIX; 785 if (pc->setupcalled) { 786 if (pc->flag == SAME_NONZERO_PATTERN) { 787 reuse = MAT_REUSE_MATRIX; 788 } else { 789 reuse = MAT_INITIAL_MATRIX; 790 } 791 } 792 if (reuse == MAT_INITIAL_MATRIX) { 793 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 794 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 795 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 796 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 797 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 798 } 799 800 /* transform local matrices if needed */ 801 if (pcbddc->ChangeOfBasisMatrix) { 802 Mat *change_mat_all; 803 IS is_local,is_global; 804 PetscBool sorted; 805 PetscInt *idxs_perm; 806 807 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr); 808 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 809 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 810 ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr); 811 if (!sorted) { 812 const PetscInt *idxs; 813 PetscInt *idxs_sorted,i; 814 815 ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr); 816 ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr); 817 for (i=0;i<pcis->n;i++) { 818 idxs_perm[i] = i; 819 } 820 ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr); 821 ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr); 822 for (i=0;i<pcis->n;i++) { 823 idxs_sorted[i] = idxs[idxs_perm[i]]; 824 } 825 ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr); 826 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 827 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr); 828 } 829 830 /* get change of basis on the whole set of local dofs */ 831 ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr); 832 833 if (!sorted) { 834 Mat new_mat; 835 IS is_perm; 836 PetscInt *idxs,i; 837 838 ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr); 839 for (i=0;i<pcis->n;i++) { 840 idxs[idxs_perm[i]] = i; 841 } 842 ierr = PetscFree(idxs_perm);CHKERRQ(ierr); 843 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr); 844 ierr = ISSetPermutation(is_perm);CHKERRQ(ierr); 845 ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr); 846 ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr); 847 change_mat_all[0] = new_mat; 848 ierr = ISDestroy(&is_perm);CHKERRQ(ierr); 849 } 850 851 /* check */ 852 if (pcbddc->dbg_flag) { 853 Vec x,x_change; 854 PetscReal error; 855 856 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 857 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 858 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 859 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 860 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 863 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 864 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 865 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 866 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 867 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 868 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 870 ierr = VecDestroy(&x);CHKERRQ(ierr); 871 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 872 } 873 874 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 875 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 876 if (isseqaij) { 877 ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 878 } else { 879 Mat work_mat; 880 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 881 ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 882 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 883 } 884 /* 885 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 886 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 887 */ 888 ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr); 889 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 890 } else { 891 /* without change of basis, the local matrix is unchanged */ 892 if (!pcbddc->local_mat) { 893 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 894 pcbddc->local_mat = matis->A; 895 } 896 } 897 898 /* get submatrices */ 899 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 900 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 901 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 902 if (!issbaij) { 903 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 904 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 905 } else { 906 Mat newmat; 907 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 908 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 909 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 910 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 917 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 918 { 919 PC_IS* pcis = (PC_IS*)(pc->data); 920 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 921 IS is_aux1,is_aux2; 922 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 923 PetscInt n_vertices,i,j,n_R,n_D,n_B; 924 PetscInt vbs,bs; 925 PetscBT bitmask; 926 PetscErrorCode ierr; 927 928 PetscFunctionBegin; 929 /* 930 No need to setup local scatters if 931 - primal space is unchanged 932 AND 933 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 934 AND 935 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 936 */ 937 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 938 PetscFunctionReturn(0); 939 } 940 /* destroy old objects */ 941 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 942 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 943 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 944 /* Set Non-overlapping dimensions */ 945 n_B = pcis->n_B; n_D = pcis->n - n_B; 946 n_vertices = pcbddc->n_actual_vertices; 947 /* create auxiliary bitmask */ 948 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 949 for (i=0;i<n_vertices;i++) { 950 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 951 } 952 953 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 954 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 955 for (i=0, n_R=0; i<pcis->n; i++) { 956 if (!PetscBTLookup(bitmask,i)) { 957 idx_R_local[n_R] = i; 958 n_R++; 959 } 960 } 961 962 /* Block code */ 963 vbs = 1; 964 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 965 if (bs>1 && !(n_vertices%bs)) { 966 PetscBool is_blocked = PETSC_TRUE; 967 PetscInt *vary; 968 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 969 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 970 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 971 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 972 for (i=0; i<n_vertices; i++) { 973 if (vary[i]!=0 && vary[i]!=bs) { 974 is_blocked = PETSC_FALSE; 975 break; 976 } 977 } 978 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 979 vbs = bs; 980 for (i=0;i<n_R/vbs;i++) { 981 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 982 } 983 } 984 ierr = PetscFree(vary);CHKERRQ(ierr); 985 } 986 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 987 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 988 989 /* print some info if requested */ 990 if (pcbddc->dbg_flag) { 991 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 993 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 996 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 999 } 1000 1001 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1002 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1003 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 1004 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 1005 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1006 for (i=0; i<n_D; i++) { 1007 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1008 } 1009 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1010 for (i=0, j=0; i<n_R; i++) { 1011 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1012 aux_array1[j++] = i; 1013 } 1014 } 1015 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1016 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1017 for (i=0, j=0; i<n_B; i++) { 1018 if (!PetscBTLookup(bitmask,is_indices[i])) { 1019 aux_array2[j++] = i; 1020 } 1021 } 1022 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1023 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1024 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1025 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1026 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1027 1028 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1029 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1030 for (i=0, j=0; i<n_R; i++) { 1031 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1032 aux_array1[j++] = i; 1033 } 1034 } 1035 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1036 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1037 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1038 } 1039 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1040 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1047 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1048 { 1049 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1050 PC_IS *pcis = (PC_IS*)pc->data; 1051 PC pc_temp; 1052 Mat A_RR; 1053 MatReuse reuse; 1054 PetscScalar m_one = -1.0; 1055 PetscReal value; 1056 PetscInt n_D,n_R,ibs,mbs; 1057 PetscBool use_exact,use_exact_reduced,issbaij; 1058 PetscErrorCode ierr; 1059 /* prefixes stuff */ 1060 char dir_prefix[256],neu_prefix[256],str_level[16]; 1061 size_t len; 1062 1063 PetscFunctionBegin; 1064 1065 /* compute prefixes */ 1066 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1067 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1068 if (!pcbddc->current_level) { 1069 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1070 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1071 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1072 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1073 } else { 1074 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1075 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1076 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1077 len -= 15; /* remove "pc_bddc_coarse_" */ 1078 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1079 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1080 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1081 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1082 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1083 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1084 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1085 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1086 } 1087 1088 /* DIRICHLET PROBLEM */ 1089 /* Matrix for Dirichlet problem is pcis->A_II */ 1090 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1091 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1092 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1093 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1094 /* default */ 1095 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1096 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1097 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1098 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1099 if (issbaij) { 1100 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1101 } else { 1102 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1103 } 1104 /* Allow user's customization */ 1105 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1106 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1107 } 1108 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1109 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1110 if (!n_D) { 1111 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1112 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1113 } 1114 /* Set Up KSP for Dirichlet problem of BDDC */ 1115 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1116 /* set ksp_D into pcis data */ 1117 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1118 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1119 pcis->ksp_D = pcbddc->ksp_D; 1120 1121 /* NEUMANN PROBLEM */ 1122 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1123 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1124 if (pcbddc->ksp_R) { /* already created ksp */ 1125 PetscInt nn_R; 1126 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1127 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1128 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1129 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1130 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1131 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1132 reuse = MAT_INITIAL_MATRIX; 1133 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1134 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1135 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1136 reuse = MAT_INITIAL_MATRIX; 1137 } else { /* safe to reuse the matrix */ 1138 reuse = MAT_REUSE_MATRIX; 1139 } 1140 } 1141 /* last check */ 1142 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1143 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1144 reuse = MAT_INITIAL_MATRIX; 1145 } 1146 } else { /* first time, so we need to create the matrix */ 1147 reuse = MAT_INITIAL_MATRIX; 1148 } 1149 /* extract A_RR */ 1150 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1151 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1152 if (ibs != mbs) { 1153 Mat newmat; 1154 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1155 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1156 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1157 } else { 1158 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1159 } 1160 if (!pcbddc->ksp_R) { /* create object if not present */ 1161 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1162 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1163 /* default */ 1164 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1165 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1166 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1167 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1168 if (issbaij) { 1169 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1170 } else { 1171 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1172 } 1173 /* Allow user's customization */ 1174 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1175 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1176 } 1177 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1178 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1179 if (!n_R) { 1180 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1181 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1182 } 1183 /* Set Up KSP for Neumann problem of BDDC */ 1184 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1185 1186 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1187 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1188 /* Dirichlet */ 1189 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1190 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1191 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1192 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1193 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1194 /* need to be adapted? */ 1195 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1196 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1197 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1198 /* print info */ 1199 if (pcbddc->dbg_flag) { 1200 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1201 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1202 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1203 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1204 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1205 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1206 } 1207 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1208 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1209 } 1210 1211 /* Neumann */ 1212 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1213 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1214 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1215 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1216 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1217 /* need to be adapted? */ 1218 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1219 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1220 /* print info */ 1221 if (pcbddc->dbg_flag) { 1222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1223 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1224 } 1225 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1226 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1227 } 1228 } 1229 /* free Neumann problem's matrix */ 1230 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1236 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1237 { 1238 PetscErrorCode ierr; 1239 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1240 1241 PetscFunctionBegin; 1242 if (applytranspose) { 1243 if (pcbddc->local_auxmat1) { 1244 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1245 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1246 } 1247 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1248 } else { 1249 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1250 if (pcbddc->local_auxmat1) { 1251 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1252 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1253 } 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1261 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1262 { 1263 PetscErrorCode ierr; 1264 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1265 PC_IS* pcis = (PC_IS*) (pc->data); 1266 const PetscScalar zero = 0.0; 1267 1268 PetscFunctionBegin; 1269 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1270 if (applytranspose) { 1271 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1272 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1273 } else { 1274 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1275 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1276 } 1277 /* start communications from local primal nodes to rhs of coarse solver */ 1278 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1279 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1280 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1281 1282 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1283 /* TODO remove null space when doing multilevel */ 1284 if (pcbddc->coarse_ksp) { 1285 if (applytranspose) { 1286 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1287 } else { 1288 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1289 } 1290 } 1291 1292 /* Local solution on R nodes */ 1293 if (pcis->n) { 1294 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1295 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1296 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1297 if (pcbddc->switch_static) { 1298 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1299 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1300 } 1301 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1302 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1303 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1304 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 if (pcbddc->switch_static) { 1306 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1308 } 1309 } 1310 1311 /* communications from coarse sol to local primal nodes */ 1312 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1313 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1314 1315 /* Sum contributions from two levels */ 1316 if (applytranspose) { 1317 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1318 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1319 } else { 1320 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1321 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1329 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1330 { 1331 PetscErrorCode ierr; 1332 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1333 PetscScalar *array,*array2; 1334 Vec from,to; 1335 1336 PetscFunctionBegin; 1337 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1338 from = pcbddc->coarse_vec; 1339 to = pcbddc->vec1_P; 1340 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1341 Vec tvec; 1342 PetscInt lsize; 1343 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1344 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1345 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1346 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1347 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1348 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1349 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1350 } 1351 } else { /* from local to global -> put data in coarse right hand side */ 1352 from = pcbddc->vec1_P; 1353 to = pcbddc->coarse_vec; 1354 } 1355 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1361 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1362 { 1363 PetscErrorCode ierr; 1364 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1365 PetscScalar *array,*array2; 1366 Vec from,to; 1367 1368 PetscFunctionBegin; 1369 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1370 from = pcbddc->coarse_vec; 1371 to = pcbddc->vec1_P; 1372 } else { /* from local to global -> put data in coarse right hand side */ 1373 from = pcbddc->vec1_P; 1374 to = pcbddc->coarse_vec; 1375 } 1376 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1377 if (smode == SCATTER_FORWARD) { 1378 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1379 Vec tvec; 1380 PetscInt lsize; 1381 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1382 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1383 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1384 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1385 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1386 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1387 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1388 } 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 /* uncomment for testing purposes */ 1394 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1397 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1398 { 1399 PetscErrorCode ierr; 1400 PC_IS* pcis = (PC_IS*)(pc->data); 1401 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1402 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1403 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1404 MatType impMatType=MATSEQAIJ; 1405 /* one and zero */ 1406 PetscScalar one=1.0,zero=0.0; 1407 /* space to store constraints and their local indices */ 1408 PetscScalar *temp_quadrature_constraint; 1409 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1410 /* iterators */ 1411 PetscInt i,j,k,total_counts,temp_start_ptr; 1412 /* stuff to store connected components stored in pcbddc->mat_graph */ 1413 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1414 PetscInt n_ISForFaces,n_ISForEdges; 1415 /* near null space stuff */ 1416 MatNullSpace nearnullsp; 1417 const Vec *nearnullvecs; 1418 Vec *localnearnullsp; 1419 PetscBool nnsp_has_cnst; 1420 PetscInt nnsp_size; 1421 PetscScalar *array; 1422 /* BLAS integers */ 1423 PetscBLASInt lwork,lierr; 1424 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1425 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1426 /* LAPACK working arrays for SVD or POD */ 1427 PetscBool skip_lapack; 1428 PetscScalar *work; 1429 PetscReal *singular_vals; 1430 #if defined(PETSC_USE_COMPLEX) 1431 PetscReal *rwork; 1432 #endif 1433 #if defined(PETSC_MISSING_LAPACK_GESVD) 1434 PetscBLASInt Blas_one_2=1; 1435 PetscScalar *temp_basis,*correlation_mat; 1436 #else 1437 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1438 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1439 #endif 1440 /* reuse */ 1441 PetscInt olocal_primal_size; 1442 PetscInt *oprimal_indices_local_idxs; 1443 /* change of basis */ 1444 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1445 PetscBool boolforchange,qr_needed; 1446 PetscBT touched,change_basis,qr_needed_idx; 1447 /* auxiliary stuff */ 1448 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1449 /* some quantities */ 1450 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1451 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1452 1453 1454 PetscFunctionBegin; 1455 /* Destroy Mat objects computed previously */ 1456 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1457 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1458 /* Get index sets for faces, edges and vertices from graph */ 1459 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1460 /* free unneeded index sets */ 1461 if (!pcbddc->use_vertices) { 1462 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1463 } 1464 if (!pcbddc->use_edges) { 1465 for (i=0;i<n_ISForEdges;i++) { 1466 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1467 } 1468 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1469 n_ISForEdges = 0; 1470 } 1471 if (!pcbddc->use_faces) { 1472 for (i=0;i<n_ISForFaces;i++) { 1473 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1474 } 1475 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1476 n_ISForFaces = 0; 1477 } 1478 /* HACKS (the following two blocks of code) */ 1479 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1480 pcbddc->use_change_of_basis = PETSC_TRUE; 1481 if (!ISForEdges) { 1482 pcbddc->use_change_on_faces = PETSC_TRUE; 1483 } 1484 } 1485 if (pcbddc->NullSpace) { 1486 /* use_change_of_basis should be consistent among processors */ 1487 PetscBool tbool[2],gbool[2]; 1488 tbool [0] = pcbddc->use_change_of_basis; 1489 tbool [1] = pcbddc->use_change_on_faces; 1490 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1491 pcbddc->use_change_of_basis = gbool[0]; 1492 pcbddc->use_change_on_faces = gbool[1]; 1493 } 1494 /* print some info */ 1495 if (pcbddc->dbg_flag) { 1496 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1497 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1498 i = 0; 1499 if (ISForVertices) { 1500 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1501 } 1502 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1503 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1505 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1506 } 1507 /* check if near null space is attached to global mat */ 1508 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1509 if (nearnullsp) { 1510 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1511 /* remove any stored info */ 1512 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1513 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1514 /* store information for BDDC solver reuse */ 1515 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1516 pcbddc->onearnullspace = nearnullsp; 1517 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1518 for (i=0;i<nnsp_size;i++) { 1519 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1520 } 1521 } else { /* if near null space is not provided BDDC uses constants by default */ 1522 nnsp_size = 0; 1523 nnsp_has_cnst = PETSC_TRUE; 1524 } 1525 /* get max number of constraints on a single cc */ 1526 max_constraints = nnsp_size; 1527 if (nnsp_has_cnst) max_constraints++; 1528 1529 /* 1530 Evaluate maximum storage size needed by the procedure 1531 - temp_indices will contain start index of each constraint stored as follows 1532 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1533 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1534 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1535 */ 1536 total_counts = n_ISForFaces+n_ISForEdges; 1537 total_counts *= max_constraints; 1538 n_vertices = 0; 1539 if (ISForVertices) { 1540 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1541 } 1542 total_counts += n_vertices; 1543 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1544 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1545 total_counts = 0; 1546 max_size_of_constraint = 0; 1547 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1548 if (i<n_ISForEdges) { 1549 used_IS = &ISForEdges[i]; 1550 } else { 1551 used_IS = &ISForFaces[i-n_ISForEdges]; 1552 } 1553 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1554 total_counts += j; 1555 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1556 } 1557 total_counts *= max_constraints; 1558 total_counts += n_vertices; 1559 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1560 /* get local part of global near null space vectors */ 1561 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1562 for (k=0;k<nnsp_size;k++) { 1563 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1564 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1565 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 } 1567 1568 /* whether or not to skip lapack calls */ 1569 skip_lapack = PETSC_TRUE; 1570 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1571 1572 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1573 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1574 PetscScalar temp_work; 1575 #if defined(PETSC_MISSING_LAPACK_GESVD) 1576 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1577 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1578 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1579 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1580 #if defined(PETSC_USE_COMPLEX) 1581 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1582 #endif 1583 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1584 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1585 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1586 lwork = -1; 1587 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1588 #if !defined(PETSC_USE_COMPLEX) 1589 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1590 #else 1591 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1592 #endif 1593 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1594 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1595 #else /* on missing GESVD */ 1596 /* SVD */ 1597 PetscInt max_n,min_n; 1598 max_n = max_size_of_constraint; 1599 min_n = max_constraints; 1600 if (max_size_of_constraint < max_constraints) { 1601 min_n = max_size_of_constraint; 1602 max_n = max_constraints; 1603 } 1604 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1605 #if defined(PETSC_USE_COMPLEX) 1606 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1607 #endif 1608 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1609 lwork = -1; 1610 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1611 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1612 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1613 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1614 #if !defined(PETSC_USE_COMPLEX) 1615 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1616 #else 1617 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1618 #endif 1619 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1620 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1621 #endif /* on missing GESVD */ 1622 /* Allocate optimal workspace */ 1623 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1624 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1625 } 1626 /* Now we can loop on constraining sets */ 1627 total_counts = 0; 1628 temp_indices[0] = 0; 1629 /* vertices */ 1630 if (ISForVertices) { 1631 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1632 if (nnsp_has_cnst) { /* consider all vertices */ 1633 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1634 for (i=0;i<n_vertices;i++) { 1635 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1636 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1637 total_counts++; 1638 } 1639 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1640 PetscBool used_vertex; 1641 for (i=0;i<n_vertices;i++) { 1642 used_vertex = PETSC_FALSE; 1643 k = 0; 1644 while (!used_vertex && k<nnsp_size) { 1645 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1646 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1647 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1648 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1649 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1650 total_counts++; 1651 used_vertex = PETSC_TRUE; 1652 } 1653 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1654 k++; 1655 } 1656 } 1657 } 1658 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1659 n_vertices = total_counts; 1660 } 1661 1662 /* edges and faces */ 1663 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1664 if (i<n_ISForEdges) { 1665 used_IS = &ISForEdges[i]; 1666 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1667 } else { 1668 used_IS = &ISForFaces[i-n_ISForEdges]; 1669 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1670 } 1671 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1672 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1673 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1674 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1675 /* change of basis should not be performed on local periodic nodes */ 1676 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1677 if (nnsp_has_cnst) { 1678 PetscScalar quad_value; 1679 temp_constraints++; 1680 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1681 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1682 for (j=0;j<size_of_constraint;j++) { 1683 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1684 } 1685 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1686 total_counts++; 1687 } 1688 for (k=0;k<nnsp_size;k++) { 1689 PetscReal real_value; 1690 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1691 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1692 for (j=0;j<size_of_constraint;j++) { 1693 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1694 } 1695 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1696 /* check if array is null on the connected component */ 1697 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1698 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1699 if (real_value > 0.0) { /* keep indices and values */ 1700 temp_constraints++; 1701 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1702 total_counts++; 1703 } 1704 } 1705 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1706 valid_constraints = temp_constraints; 1707 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1708 if (!pcbddc->use_nnsp_true && temp_constraints) { 1709 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1710 1711 #if defined(PETSC_MISSING_LAPACK_GESVD) 1712 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1713 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1714 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1715 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1716 from that computed using LAPACKgesvd 1717 -> This is due to a different computation of eigenvectors in LAPACKheev 1718 -> The quality of the POD-computed basis will be the same */ 1719 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1720 /* Store upper triangular part of correlation matrix */ 1721 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1722 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1723 for (j=0;j<temp_constraints;j++) { 1724 for (k=0;k<j+1;k++) { 1725 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1726 } 1727 } 1728 /* compute eigenvalues and eigenvectors of correlation matrix */ 1729 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1730 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1731 #if !defined(PETSC_USE_COMPLEX) 1732 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1733 #else 1734 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1735 #endif 1736 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1737 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1738 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1739 j=0; 1740 while (j < temp_constraints && singular_vals[j] < tol) j++; 1741 total_counts=total_counts-j; 1742 valid_constraints = temp_constraints-j; 1743 /* scale and copy POD basis into used quadrature memory */ 1744 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1745 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1746 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1747 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1748 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1749 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1750 if (j<temp_constraints) { 1751 PetscInt ii; 1752 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1753 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1754 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1755 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1756 for (k=0;k<temp_constraints-j;k++) { 1757 for (ii=0;ii<size_of_constraint;ii++) { 1758 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1759 } 1760 } 1761 } 1762 #else /* on missing GESVD */ 1763 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1764 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1765 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1766 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1767 #if !defined(PETSC_USE_COMPLEX) 1768 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1769 #else 1770 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1771 #endif 1772 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1773 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1774 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1775 k = temp_constraints; 1776 if (k > size_of_constraint) k = size_of_constraint; 1777 j = 0; 1778 while (j < k && singular_vals[k-j-1] < tol) j++; 1779 total_counts = total_counts-temp_constraints+k-j; 1780 valid_constraints = k-j; 1781 #endif /* on missing GESVD */ 1782 } 1783 /* setting change_of_basis flag is safe now */ 1784 if (boolforchange) { 1785 for (j=0;j<valid_constraints;j++) { 1786 PetscBTSet(change_basis,total_counts-j-1); 1787 } 1788 } 1789 } 1790 /* free index sets of faces, edges and vertices */ 1791 for (i=0;i<n_ISForFaces;i++) { 1792 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1793 } 1794 if (n_ISForFaces) { 1795 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1796 } 1797 for (i=0;i<n_ISForEdges;i++) { 1798 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1799 } 1800 if (n_ISForEdges) { 1801 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1802 } 1803 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1804 /* map temp_indices_to_constraint in boundary numbering */ 1805 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1806 if (i != temp_indices[total_counts]) { 1807 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1808 } 1809 1810 /* free workspace */ 1811 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1812 ierr = PetscFree(work);CHKERRQ(ierr); 1813 #if defined(PETSC_USE_COMPLEX) 1814 ierr = PetscFree(rwork);CHKERRQ(ierr); 1815 #endif 1816 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1817 #if defined(PETSC_MISSING_LAPACK_GESVD) 1818 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1819 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1820 #endif 1821 } 1822 for (k=0;k<nnsp_size;k++) { 1823 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1824 } 1825 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1826 1827 /* set quantities in pcbddc data structure and store previous primal size */ 1828 /* n_vertices defines the number of subdomain corners in the primal space */ 1829 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1830 olocal_primal_size = pcbddc->local_primal_size; 1831 pcbddc->local_primal_size = total_counts; 1832 pcbddc->n_vertices = n_vertices; 1833 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1834 1835 /* Create constraint matrix */ 1836 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1837 /* so we need to set it up properly either with or without change of basis */ 1838 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1839 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1840 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1841 /* array to compute a local numbering of constraints : vertices first then constraints */ 1842 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1843 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1844 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1845 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1846 /* auxiliary stuff for basis change */ 1847 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1848 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1849 1850 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1851 total_primal_vertices=0; 1852 for (i=0;i<pcbddc->local_primal_size;i++) { 1853 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1854 if (size_of_constraint == 1) { 1855 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1856 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1857 aux_primal_minloc[total_primal_vertices]=0; 1858 total_primal_vertices++; 1859 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1860 PetscInt min_loc,min_index; 1861 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1862 /* find first untouched local node */ 1863 k = 0; 1864 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1865 min_index = global_indices[k]; 1866 min_loc = k; 1867 /* search the minimum among global nodes already untouched on the cc */ 1868 for (k=1;k<size_of_constraint;k++) { 1869 /* there can be more than one constraint on a single connected component */ 1870 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1871 min_index = global_indices[k]; 1872 min_loc = k; 1873 } 1874 } 1875 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1876 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1877 aux_primal_minloc[total_primal_vertices]=min_loc; 1878 total_primal_vertices++; 1879 } 1880 } 1881 /* determine if a QR strategy is needed for change of basis */ 1882 qr_needed = PETSC_FALSE; 1883 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1884 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1885 if (PetscBTLookup(change_basis,i)) { 1886 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1887 j = 0; 1888 for (k=0;k<size_of_constraint;k++) { 1889 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1890 j++; 1891 } 1892 } 1893 /* found more than one primal dof on the cc */ 1894 if (j > 1) { 1895 PetscBTSet(qr_needed_idx,i); 1896 qr_needed = PETSC_TRUE; 1897 } 1898 } 1899 } 1900 /* free workspace */ 1901 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1902 1903 /* permute indices in order to have a sorted set of vertices */ 1904 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1905 1906 /* nonzero structure of constraint matrix */ 1907 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1908 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1909 j=total_primal_vertices; 1910 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1911 if (!PetscBTLookup(change_basis,i)) { 1912 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1913 j++; 1914 } 1915 } 1916 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1917 ierr = PetscFree(nnz);CHKERRQ(ierr); 1918 /* set values in constraint matrix */ 1919 for (i=0;i<total_primal_vertices;i++) { 1920 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1921 } 1922 total_counts = total_primal_vertices; 1923 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1924 if (!PetscBTLookup(change_basis,i)) { 1925 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1926 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1927 total_counts++; 1928 } 1929 } 1930 /* assembling */ 1931 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1932 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1933 /* 1934 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1935 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1936 */ 1937 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1938 if (pcbddc->use_change_of_basis) { 1939 /* dual and primal dofs on a single cc */ 1940 PetscInt dual_dofs,primal_dofs; 1941 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1942 PetscInt primal_counter; 1943 /* working stuff for GEQRF */ 1944 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1945 PetscBLASInt lqr_work; 1946 /* working stuff for UNGQR */ 1947 PetscScalar *gqr_work,lgqr_work_t; 1948 PetscBLASInt lgqr_work; 1949 /* working stuff for TRTRS */ 1950 PetscScalar *trs_rhs; 1951 PetscBLASInt Blas_NRHS; 1952 /* pointers for values insertion into change of basis matrix */ 1953 PetscInt *start_rows,*start_cols; 1954 PetscScalar *start_vals; 1955 /* working stuff for values insertion */ 1956 PetscBT is_primal; 1957 /* matrix sizes */ 1958 PetscInt global_size,local_size; 1959 /* work array for nonzeros */ 1960 PetscScalar *nnz_array; 1961 /* temporary change of basis */ 1962 Mat localChangeOfBasisMatrix; 1963 /* auxiliary work for global change of basis */ 1964 Vec nnz_vec; 1965 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 1966 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 1967 PetscScalar *vals; 1968 PetscBool done; 1969 1970 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 1971 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 1972 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1973 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1974 1975 /* nonzeros for local mat */ 1976 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1977 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1978 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1979 if (PetscBTLookup(change_basis,i)) { 1980 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1981 if (PetscBTLookup(qr_needed_idx,i)) { 1982 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1983 } else { 1984 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1985 /* get local primal index on the cc */ 1986 j = 0; 1987 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1988 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1989 } 1990 } 1991 } 1992 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1993 /* Set initial identity in the matrix */ 1994 for (i=0;i<pcis->n_B;i++) { 1995 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1996 } 1997 1998 if (pcbddc->dbg_flag) { 1999 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2001 } 2002 2003 2004 /* Now we loop on the constraints which need a change of basis */ 2005 /* 2006 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2007 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2008 2009 Basic blocks of change of basis matrix T computed 2010 2011 - Using the following block transformation if there is only a primal dof on the cc 2012 (in the example, primal dof is the last one of the edge in LOCAL ordering 2013 in this code, primal dof is the first one of the edge in GLOBAL ordering) 2014 | 1 0 ... 0 1 | 2015 | 0 1 ... 0 1 | 2016 | ... | 2017 | 0 ... 1 1 | 2018 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 2019 2020 - via QR decomposition of constraints otherwise 2021 */ 2022 if (qr_needed) { 2023 /* space to store Q */ 2024 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 2025 /* first we issue queries for optimal work */ 2026 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2027 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2028 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2029 lqr_work = -1; 2030 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2031 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2032 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2033 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2034 lgqr_work = -1; 2035 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2036 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2037 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2038 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2039 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2040 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2041 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2042 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2043 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2044 /* array to store scaling factors for reflectors */ 2045 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2046 /* array to store rhs and solution of triangular solver */ 2047 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2048 /* allocating workspace for check */ 2049 if (pcbddc->dbg_flag) { 2050 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2051 } 2052 } 2053 /* array to store whether a node is primal or not */ 2054 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2055 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2056 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2057 if (i != total_primal_vertices) { 2058 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2059 } 2060 for (i=0;i<total_primal_vertices;i++) { 2061 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2062 } 2063 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2064 2065 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2066 /* -> using implicit ordering contained in temp_indices data */ 2067 total_counts = pcbddc->n_vertices; 2068 primal_counter = total_counts; 2069 while (total_counts<pcbddc->local_primal_size) { 2070 primal_dofs = 1; 2071 if (PetscBTLookup(change_basis,total_counts)) { 2072 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2073 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2074 primal_dofs++; 2075 } 2076 /* get constraint info */ 2077 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2078 dual_dofs = size_of_constraint-primal_dofs; 2079 2080 if (pcbddc->dbg_flag) { 2081 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2082 } 2083 2084 if (primal_dofs > 1) { /* QR */ 2085 2086 /* copy quadrature constraints for change of basis check */ 2087 if (pcbddc->dbg_flag) { 2088 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2089 } 2090 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2091 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2092 2093 /* compute QR decomposition of constraints */ 2094 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2096 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2097 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2098 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2099 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2100 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2101 2102 /* explictly compute R^-T */ 2103 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2104 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2105 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2106 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2107 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2108 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2109 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2110 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2111 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2112 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2113 2114 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2115 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2116 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2117 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2118 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2119 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2120 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2121 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2122 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2123 2124 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2125 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2126 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2127 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2128 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2129 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2130 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2131 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2132 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2133 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2134 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2135 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2136 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2137 2138 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2139 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2140 /* insert cols for primal dofs */ 2141 for (j=0;j<primal_dofs;j++) { 2142 start_vals = &qr_basis[j*size_of_constraint]; 2143 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2144 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2145 } 2146 /* insert cols for dual dofs */ 2147 for (j=0,k=0;j<dual_dofs;k++) { 2148 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2149 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2150 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2151 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2152 j++; 2153 } 2154 } 2155 2156 /* check change of basis */ 2157 if (pcbddc->dbg_flag) { 2158 PetscInt ii,jj; 2159 PetscBool valid_qr=PETSC_TRUE; 2160 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2161 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2162 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2163 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2164 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2165 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2166 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2167 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2168 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2169 for (jj=0;jj<size_of_constraint;jj++) { 2170 for (ii=0;ii<primal_dofs;ii++) { 2171 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2172 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2173 } 2174 } 2175 if (!valid_qr) { 2176 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2177 for (jj=0;jj<size_of_constraint;jj++) { 2178 for (ii=0;ii<primal_dofs;ii++) { 2179 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2180 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2181 } 2182 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2183 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2184 } 2185 } 2186 } 2187 } else { 2188 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2189 } 2190 } 2191 } else { /* simple transformation block */ 2192 PetscInt row,col; 2193 PetscScalar val; 2194 for (j=0;j<size_of_constraint;j++) { 2195 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2196 if (!PetscBTLookup(is_primal,row)) { 2197 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2198 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2199 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2200 } else { 2201 for (k=0;k<size_of_constraint;k++) { 2202 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2203 if (row != col) { 2204 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2205 } else { 2206 val = 1.0; 2207 } 2208 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2209 } 2210 } 2211 } 2212 if (pcbddc->dbg_flag) { 2213 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2214 } 2215 } 2216 /* increment primal counter */ 2217 primal_counter += primal_dofs; 2218 } else { 2219 if (pcbddc->dbg_flag) { 2220 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2221 } 2222 } 2223 /* increment constraint counter total_counts */ 2224 total_counts += primal_dofs; 2225 } 2226 2227 /* free workspace */ 2228 if (qr_needed) { 2229 if (pcbddc->dbg_flag) { 2230 ierr = PetscFree(work);CHKERRQ(ierr); 2231 } 2232 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2233 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2234 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2235 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2236 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2237 } 2238 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2239 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2240 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2241 2242 /* assembling of global change of variable */ 2243 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2244 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2245 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2246 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2247 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2248 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2249 2250 /* nonzeros (overestimated) */ 2251 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2252 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2253 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2254 for (i=0;i<pcis->n;i++) { 2255 nnz_array[i] = 1.0; 2256 idxs_all[i] = i; 2257 } 2258 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2259 for (i=0;i<pcis->n_B;i++) { 2260 nnz_array[idxs_B[i]] = nnz[i]; 2261 } 2262 if (pcis->n) { 2263 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2264 } 2265 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2266 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2267 ierr = PetscFree(nnz);CHKERRQ(ierr); 2268 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2269 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2270 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2271 for (i=0;i<local_size;i++) { 2272 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2273 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2274 } 2275 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2276 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2277 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2278 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2279 2280 /* Set identity on dirichlet dofs */ 2281 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2282 for (i=0;i<pcis->n-pcis->n_B;i++) { 2283 PetscScalar one=1.0; 2284 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2285 } 2286 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2287 2288 /* Set values at interface dofs */ 2289 done = PETSC_TRUE; 2290 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2291 if (!done) { 2292 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2293 } 2294 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2295 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2296 ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2297 for (i=0;i<nvtxs;i++) { 2298 PetscInt row,*cols,ncols; 2299 PetscScalar *mat_vals; 2300 2301 row = idxs_B[i]; 2302 ncols = xadj[i+1]-xadj[i]; 2303 cols = idxs_mapped+xadj[i]; 2304 mat_vals = vals+xadj[i]; 2305 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2306 } 2307 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2308 if (!done) { 2309 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2310 } 2311 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2312 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2313 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2314 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2315 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2316 2317 /* check */ 2318 if (pcbddc->dbg_flag) { 2319 PetscReal error; 2320 Vec x,x_change; 2321 2322 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2323 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2324 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2325 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2326 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2327 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2328 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2329 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2330 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2331 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2332 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2333 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2334 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2335 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2336 ierr = VecDestroy(&x);CHKERRQ(ierr); 2337 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2338 } 2339 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2340 2341 /* 2342 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2343 ierr = MatView(local_ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2344 */ 2345 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2346 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2347 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2348 } 2349 2350 /* set up change of basis context */ 2351 if (pcbddc->ChangeOfBasisMatrix) { 2352 PCBDDCChange_ctx change_ctx; 2353 2354 if (!pcbddc->new_global_mat) { 2355 PetscInt global_size,local_size; 2356 2357 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2358 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2359 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2360 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2361 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2362 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2363 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2364 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2365 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2366 } else { 2367 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2368 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2369 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2370 } 2371 if (!pcbddc->user_ChangeOfBasisMatrix) { 2372 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2373 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2374 } else { 2375 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2376 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2377 } 2378 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2379 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2380 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2381 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2382 } 2383 2384 /* get indices in local ordering for vertices and constraints */ 2385 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2386 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2387 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2388 } 2389 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2390 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2391 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2392 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2393 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2394 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2395 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2396 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2397 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2398 /* set quantities in PCBDDC data struct */ 2399 pcbddc->n_actual_vertices = i; 2400 /* check if a new primal space has been introduced */ 2401 pcbddc->new_primal_space_local = PETSC_TRUE; 2402 if (olocal_primal_size == pcbddc->local_primal_size) { 2403 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2404 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2405 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2406 } 2407 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2408 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2409 2410 /* flush dbg viewer */ 2411 if (pcbddc->dbg_flag) { 2412 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2413 } 2414 2415 /* free workspace */ 2416 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2417 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2418 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2419 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2420 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2421 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2427 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2428 { 2429 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2430 PC_IS *pcis = (PC_IS*)pc->data; 2431 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2432 PetscInt ierr,i,vertex_size; 2433 PetscViewer viewer=pcbddc->dbg_viewer; 2434 2435 PetscFunctionBegin; 2436 /* Reset previously computed graph */ 2437 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2438 /* Init local Graph struct */ 2439 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2440 2441 /* Check validity of the csr graph passed in by the user */ 2442 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2443 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2444 } 2445 2446 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2447 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2448 Mat mat_adj; 2449 const PetscInt *xadj,*adjncy; 2450 PetscBool flg_row=PETSC_TRUE; 2451 2452 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2453 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2454 if (!flg_row) { 2455 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2456 } 2457 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2458 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2459 if (!flg_row) { 2460 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2461 } 2462 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2463 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2464 } 2465 2466 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2467 vertex_size = 1; 2468 if (pcbddc->user_provided_isfordofs) { 2469 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2470 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2471 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2472 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2473 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2474 } 2475 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2476 pcbddc->n_ISForDofs = 0; 2477 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2478 } 2479 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2480 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2481 } else { 2482 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2483 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2484 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2485 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2486 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2487 } 2488 } 2489 } 2490 2491 /* Setup of Graph */ 2492 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2493 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2494 } 2495 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2496 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2497 } 2498 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2499 2500 /* Graph's connected components analysis */ 2501 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2502 2503 /* print some info to stdout */ 2504 if (pcbddc->dbg_flag) { 2505 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2506 } 2507 2508 /* mark topography has done */ 2509 pcbddc->recompute_topography = PETSC_FALSE; 2510 PetscFunctionReturn(0); 2511 } 2512 2513 #undef __FUNCT__ 2514 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2515 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2516 { 2517 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2518 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2519 PetscErrorCode ierr; 2520 2521 PetscFunctionBegin; 2522 n = 0; 2523 vertices = 0; 2524 if (pcbddc->ConstraintMatrix) { 2525 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2526 for (i=0;i<local_primal_size;i++) { 2527 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2528 if (size_of_constraint == 1) n++; 2529 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2530 } 2531 if (vertices_idx) { 2532 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2533 n = 0; 2534 for (i=0;i<local_primal_size;i++) { 2535 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2536 if (size_of_constraint == 1) { 2537 vertices[n++]=row_cmat_indices[0]; 2538 } 2539 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2540 } 2541 } 2542 } 2543 *n_vertices = n; 2544 if (vertices_idx) *vertices_idx = vertices; 2545 PetscFunctionReturn(0); 2546 } 2547 2548 #undef __FUNCT__ 2549 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2550 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2551 { 2552 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2553 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2554 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2555 PetscBT touched; 2556 PetscErrorCode ierr; 2557 2558 /* This function assumes that the number of local constraints per connected component 2559 is not greater than the number of nodes defined for the connected component 2560 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2561 PetscFunctionBegin; 2562 n = 0; 2563 constraints_index = 0; 2564 if (pcbddc->ConstraintMatrix) { 2565 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2566 max_size_of_constraint = 0; 2567 for (i=0;i<local_primal_size;i++) { 2568 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2569 if (size_of_constraint > 1) { 2570 n++; 2571 } 2572 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2573 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2574 } 2575 if (constraints_idx) { 2576 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2577 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2578 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2579 n = 0; 2580 for (i=0;i<local_primal_size;i++) { 2581 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2582 if (size_of_constraint > 1) { 2583 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2584 /* find first untouched local node */ 2585 j = 0; 2586 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2587 min_index = row_cmat_global_indices[j]; 2588 min_loc = j; 2589 /* search the minimum among nodes not yet touched on the connected component 2590 since there can be more than one constraint on a single cc */ 2591 for (j=1;j<size_of_constraint;j++) { 2592 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2593 min_index = row_cmat_global_indices[j]; 2594 min_loc = j; 2595 } 2596 } 2597 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2598 constraints_index[n++] = row_cmat_indices[min_loc]; 2599 } 2600 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2601 } 2602 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2603 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2604 } 2605 } 2606 *n_constraints = n; 2607 if (constraints_idx) *constraints_idx = constraints_index; 2608 PetscFunctionReturn(0); 2609 } 2610 2611 #undef __FUNCT__ 2612 #define __FUNCT__ "PCBDDCSubsetNumbering" 2613 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2614 { 2615 Vec local_vec,global_vec; 2616 IS seqis,paris; 2617 VecScatter scatter_ctx; 2618 PetscScalar *array; 2619 PetscInt *temp_global_dofs; 2620 PetscScalar globalsum; 2621 PetscInt i,j,s; 2622 PetscInt nlocals,first_index,old_index,max_local; 2623 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2624 PetscMPIInt *dof_sizes,*dof_displs; 2625 PetscBool first_found; 2626 PetscErrorCode ierr; 2627 2628 PetscFunctionBegin; 2629 /* mpi buffers */ 2630 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2631 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2632 j = ( !rank_prec_comm ? size_prec_comm : 0); 2633 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2634 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2635 /* get maximum size of subset */ 2636 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2637 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2638 max_local = 0; 2639 for (i=0;i<n_local_dofs;i++) { 2640 if (max_local < temp_global_dofs[i] ) { 2641 max_local = temp_global_dofs[i]; 2642 } 2643 } 2644 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2645 max_global++; 2646 max_local = 0; 2647 for (i=0;i<n_local_dofs;i++) { 2648 if (max_local < local_dofs[i] ) { 2649 max_local = local_dofs[i]; 2650 } 2651 } 2652 max_local++; 2653 /* allocate workspace */ 2654 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2655 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2656 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2657 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2658 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2659 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2660 /* create scatter */ 2661 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2662 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2663 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2664 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2665 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2666 /* init array */ 2667 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2668 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2669 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2670 if (local_dofs_mult) { 2671 for (i=0;i<n_local_dofs;i++) { 2672 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2673 } 2674 } else { 2675 for (i=0;i<n_local_dofs;i++) { 2676 array[local_dofs[i]]=1.0; 2677 } 2678 } 2679 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2680 /* scatter into global vec and get total number of global dofs */ 2681 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2682 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2683 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2684 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2685 /* Fill global_vec with cumulative function for global numbering */ 2686 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2687 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2688 nlocals = 0; 2689 first_index = -1; 2690 first_found = PETSC_FALSE; 2691 for (i=0;i<s;i++) { 2692 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2693 first_found = PETSC_TRUE; 2694 first_index = i; 2695 } 2696 nlocals += (PetscInt)PetscRealPart(array[i]); 2697 } 2698 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2699 if (!rank_prec_comm) { 2700 dof_displs[0]=0; 2701 for (i=1;i<size_prec_comm;i++) { 2702 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2703 } 2704 } 2705 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2706 if (first_found) { 2707 array[first_index] += (PetscScalar)nlocals; 2708 old_index = first_index; 2709 for (i=first_index+1;i<s;i++) { 2710 if (PetscRealPart(array[i]) > 0.1) { 2711 array[i] += array[old_index]; 2712 old_index = i; 2713 } 2714 } 2715 } 2716 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2717 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2718 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2719 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2720 /* get global ordering of local dofs */ 2721 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2722 if (local_dofs_mult) { 2723 for (i=0;i<n_local_dofs;i++) { 2724 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2725 } 2726 } else { 2727 for (i=0;i<n_local_dofs;i++) { 2728 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2729 } 2730 } 2731 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2732 /* free workspace */ 2733 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2734 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2735 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2736 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2737 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2738 /* return pointer to global ordering of local dofs */ 2739 *global_numbering_subset = temp_global_dofs; 2740 PetscFunctionReturn(0); 2741 } 2742 2743 #undef __FUNCT__ 2744 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2745 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2746 { 2747 PetscInt i,j; 2748 PetscScalar *alphas; 2749 PetscErrorCode ierr; 2750 2751 PetscFunctionBegin; 2752 /* this implements stabilized Gram-Schmidt */ 2753 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2754 for (i=0;i<n;i++) { 2755 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2756 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2757 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2758 } 2759 ierr = PetscFree(alphas);CHKERRQ(ierr); 2760 PetscFunctionReturn(0); 2761 } 2762 2763 #undef __FUNCT__ 2764 #define __FUNCT__ "MatISGetSubassemblingPattern" 2765 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2766 { 2767 Mat subdomain_adj; 2768 IS new_ranks,ranks_send_to; 2769 MatPartitioning partitioner; 2770 Mat_IS *matis; 2771 PetscInt n_neighs,*neighs,*n_shared,**shared; 2772 PetscInt prank; 2773 PetscMPIInt size,rank,color; 2774 PetscInt *xadj,*adjncy,*oldranks; 2775 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2776 PetscInt i,local_size,threshold=0; 2777 PetscErrorCode ierr; 2778 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2779 PetscSubcomm subcomm; 2780 2781 PetscFunctionBegin; 2782 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2783 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2784 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2785 2786 /* Get info on mapping */ 2787 matis = (Mat_IS*)(mat->data); 2788 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2789 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2790 2791 /* build local CSR graph of subdomains' connectivity */ 2792 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2793 xadj[0] = 0; 2794 xadj[1] = PetscMax(n_neighs-1,0); 2795 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2796 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2797 2798 if (threshold) { 2799 PetscInt xadj_count = 0; 2800 for (i=1;i<n_neighs;i++) { 2801 if (n_shared[i] > threshold) { 2802 adjncy[xadj_count] = neighs[i]; 2803 adjncy_wgt[xadj_count] = n_shared[i]; 2804 xadj_count++; 2805 } 2806 } 2807 xadj[1] = xadj_count; 2808 } else { 2809 if (xadj[1]) { 2810 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2811 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2812 } 2813 } 2814 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2815 if (use_square) { 2816 for (i=0;i<xadj[1];i++) { 2817 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2818 } 2819 } 2820 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2821 2822 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 2823 2824 /* 2825 Restrict work on active processes only. 2826 */ 2827 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2828 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2829 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2830 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2831 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2832 if (color) { 2833 ierr = PetscFree(xadj);CHKERRQ(ierr); 2834 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2835 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2836 } else { 2837 PetscInt coarsening_ratio; 2838 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2839 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2840 prank = rank; 2841 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2842 /* 2843 for (i=0;i<size;i++) { 2844 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2845 } 2846 */ 2847 for (i=0;i<xadj[1];i++) { 2848 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2849 } 2850 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2851 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2852 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2853 2854 /* Partition */ 2855 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2856 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2857 if (use_vwgt) { 2858 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 2859 v_wgt[0] = local_size; 2860 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2861 } 2862 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2863 coarsening_ratio = size/n_subdomains; 2864 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2865 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2866 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2867 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2868 2869 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2870 if (contiguous) { 2871 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2872 } else { 2873 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2874 } 2875 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2876 /* clean up */ 2877 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2878 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2879 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2880 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2881 } 2882 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2883 2884 /* assemble parallel IS for sends */ 2885 i = 1; 2886 if (color) i=0; 2887 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2888 2889 /* get back IS */ 2890 *is_sends = ranks_send_to; 2891 PetscFunctionReturn(0); 2892 } 2893 2894 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2895 2896 #undef __FUNCT__ 2897 #define __FUNCT__ "MatISSubassemble" 2898 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2899 { 2900 Mat local_mat; 2901 Mat_IS *matis; 2902 IS is_sends_internal; 2903 PetscInt rows,cols,new_local_rows; 2904 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2905 PetscBool ismatis,isdense,newisdense,destroy_mat; 2906 ISLocalToGlobalMapping l2gmap; 2907 PetscInt* l2gmap_indices; 2908 const PetscInt* is_indices; 2909 MatType new_local_type; 2910 /* buffers */ 2911 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2912 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2913 PetscInt *recv_buffer_idxs_local; 2914 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2915 /* MPI */ 2916 MPI_Comm comm,comm_n; 2917 PetscSubcomm subcomm; 2918 PetscMPIInt n_sends,n_recvs,commsize; 2919 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2920 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2921 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2922 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2923 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2924 PetscErrorCode ierr; 2925 2926 PetscFunctionBegin; 2927 /* TODO: add missing checks */ 2928 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2929 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2930 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2931 PetscValidLogicalCollectiveInt(mat,nis,7); 2932 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2933 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2934 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2935 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2936 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2937 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2938 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2939 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2940 PetscInt mrows,mcols,mnrows,mncols; 2941 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2942 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2943 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2944 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2945 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2946 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2947 } 2948 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2949 PetscValidLogicalCollectiveInt(mat,bs,0); 2950 /* prepare IS for sending if not provided */ 2951 if (!is_sends) { 2952 PetscBool pcontig = PETSC_TRUE; 2953 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2954 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 2955 } else { 2956 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2957 is_sends_internal = is_sends; 2958 } 2959 2960 /* get pointer of MATIS data */ 2961 matis = (Mat_IS*)mat->data; 2962 2963 /* get comm */ 2964 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2965 2966 /* compute number of sends */ 2967 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2968 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2969 2970 /* compute number of receives */ 2971 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2972 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 2973 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2974 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2975 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2976 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2977 ierr = PetscFree(iflags);CHKERRQ(ierr); 2978 2979 /* restrict comm if requested */ 2980 subcomm = 0; 2981 destroy_mat = PETSC_FALSE; 2982 if (restrict_comm) { 2983 PetscMPIInt color,rank,subcommsize; 2984 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2985 color = 0; 2986 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 2987 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2988 subcommsize = commsize - subcommsize; 2989 /* check if reuse has been requested */ 2990 if (reuse == MAT_REUSE_MATRIX) { 2991 if (*mat_n) { 2992 PetscMPIInt subcommsize2; 2993 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 2994 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 2995 comm_n = PetscObjectComm((PetscObject)*mat_n); 2996 } else { 2997 comm_n = PETSC_COMM_SELF; 2998 } 2999 } else { /* MAT_INITIAL_MATRIX */ 3000 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3001 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3002 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3003 comm_n = subcomm->comm; 3004 } 3005 /* flag to destroy *mat_n if not significative */ 3006 if (color) destroy_mat = PETSC_TRUE; 3007 } else { 3008 comm_n = comm; 3009 } 3010 3011 /* prepare send/receive buffers */ 3012 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3013 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3014 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3015 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3016 if (nis) { 3017 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 3018 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 3019 } 3020 3021 /* Get data from local matrices */ 3022 if (!isdense) { 3023 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3024 /* TODO: See below some guidelines on how to prepare the local buffers */ 3025 /* 3026 send_buffer_vals should contain the raw values of the local matrix 3027 send_buffer_idxs should contain: 3028 - MatType_PRIVATE type 3029 - PetscInt size_of_l2gmap 3030 - PetscInt global_row_indices[size_of_l2gmap] 3031 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3032 */ 3033 } else { 3034 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3035 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3036 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 3037 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3038 send_buffer_idxs[1] = i; 3039 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3040 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3041 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3042 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3043 for (i=0;i<n_sends;i++) { 3044 ilengths_vals[is_indices[i]] = len*len; 3045 ilengths_idxs[is_indices[i]] = len+2; 3046 } 3047 } 3048 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3049 /* additional is (if any) */ 3050 if (nis) { 3051 PetscMPIInt psum; 3052 PetscInt j; 3053 for (j=0,psum=0;j<nis;j++) { 3054 PetscInt plen; 3055 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3056 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3057 psum += len+1; /* indices + lenght */ 3058 } 3059 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 3060 for (j=0,psum=0;j<nis;j++) { 3061 PetscInt plen; 3062 const PetscInt *is_array_idxs; 3063 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3064 send_buffer_idxs_is[psum] = plen; 3065 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3066 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3067 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3068 psum += plen+1; /* indices + lenght */ 3069 } 3070 for (i=0;i<n_sends;i++) { 3071 ilengths_idxs_is[is_indices[i]] = psum; 3072 } 3073 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3074 } 3075 3076 buf_size_idxs = 0; 3077 buf_size_vals = 0; 3078 buf_size_idxs_is = 0; 3079 for (i=0;i<n_recvs;i++) { 3080 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3081 buf_size_vals += (PetscInt)olengths_vals[i]; 3082 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3083 } 3084 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3085 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3086 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3087 3088 /* get new tags for clean communications */ 3089 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3090 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3091 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3092 3093 /* allocate for requests */ 3094 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3095 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3096 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3097 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3098 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3099 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3100 3101 /* communications */ 3102 ptr_idxs = recv_buffer_idxs; 3103 ptr_vals = recv_buffer_vals; 3104 ptr_idxs_is = recv_buffer_idxs_is; 3105 for (i=0;i<n_recvs;i++) { 3106 source_dest = onodes[i]; 3107 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3108 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3109 ptr_idxs += olengths_idxs[i]; 3110 ptr_vals += olengths_vals[i]; 3111 if (nis) { 3112 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3113 ptr_idxs_is += olengths_idxs_is[i]; 3114 } 3115 } 3116 for (i=0;i<n_sends;i++) { 3117 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3118 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3119 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3120 if (nis) { 3121 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3122 } 3123 } 3124 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3125 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3126 3127 /* assemble new l2g map */ 3128 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3129 ptr_idxs = recv_buffer_idxs; 3130 new_local_rows = 0; 3131 for (i=0;i<n_recvs;i++) { 3132 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3133 ptr_idxs += olengths_idxs[i]; 3134 } 3135 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3136 ptr_idxs = recv_buffer_idxs; 3137 new_local_rows = 0; 3138 for (i=0;i<n_recvs;i++) { 3139 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3140 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3141 ptr_idxs += olengths_idxs[i]; 3142 } 3143 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3144 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3145 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3146 3147 /* infer new local matrix type from received local matrices type */ 3148 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3149 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3150 if (n_recvs) { 3151 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3152 ptr_idxs = recv_buffer_idxs; 3153 for (i=0;i<n_recvs;i++) { 3154 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3155 new_local_type_private = MATAIJ_PRIVATE; 3156 break; 3157 } 3158 ptr_idxs += olengths_idxs[i]; 3159 } 3160 switch (new_local_type_private) { 3161 case MATDENSE_PRIVATE: 3162 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3163 new_local_type = MATSEQAIJ; 3164 bs = 1; 3165 } else { /* if I receive only 1 dense matrix */ 3166 new_local_type = MATSEQDENSE; 3167 bs = 1; 3168 } 3169 break; 3170 case MATAIJ_PRIVATE: 3171 new_local_type = MATSEQAIJ; 3172 bs = 1; 3173 break; 3174 case MATBAIJ_PRIVATE: 3175 new_local_type = MATSEQBAIJ; 3176 break; 3177 case MATSBAIJ_PRIVATE: 3178 new_local_type = MATSEQSBAIJ; 3179 break; 3180 default: 3181 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3182 break; 3183 } 3184 } else { /* by default, new_local_type is seqdense */ 3185 new_local_type = MATSEQDENSE; 3186 bs = 1; 3187 } 3188 3189 /* create MATIS object if needed */ 3190 if (reuse == MAT_INITIAL_MATRIX) { 3191 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3192 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3193 } else { 3194 /* it also destroys the local matrices */ 3195 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3196 } 3197 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3198 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3199 3200 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3201 3202 /* Global to local map of received indices */ 3203 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3204 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3205 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3206 3207 /* restore attributes -> type of incoming data and its size */ 3208 buf_size_idxs = 0; 3209 for (i=0;i<n_recvs;i++) { 3210 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3211 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3212 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3213 } 3214 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3215 3216 /* set preallocation */ 3217 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3218 if (!newisdense) { 3219 PetscInt *new_local_nnz=0; 3220 3221 ptr_vals = recv_buffer_vals; 3222 ptr_idxs = recv_buffer_idxs_local; 3223 if (n_recvs) { 3224 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3225 } 3226 for (i=0;i<n_recvs;i++) { 3227 PetscInt j; 3228 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3229 for (j=0;j<*(ptr_idxs+1);j++) { 3230 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3231 } 3232 } else { 3233 /* TODO */ 3234 } 3235 ptr_idxs += olengths_idxs[i]; 3236 } 3237 if (new_local_nnz) { 3238 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3239 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3240 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3241 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3242 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3243 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3244 } else { 3245 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3246 } 3247 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3248 } else { 3249 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3250 } 3251 3252 /* set values */ 3253 ptr_vals = recv_buffer_vals; 3254 ptr_idxs = recv_buffer_idxs_local; 3255 for (i=0;i<n_recvs;i++) { 3256 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3257 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3258 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3259 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3260 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3261 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3262 } else { 3263 /* TODO */ 3264 } 3265 ptr_idxs += olengths_idxs[i]; 3266 ptr_vals += olengths_vals[i]; 3267 } 3268 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3269 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3270 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3271 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3272 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3273 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3274 3275 #if 0 3276 if (!restrict_comm) { /* check */ 3277 Vec lvec,rvec; 3278 PetscReal infty_error; 3279 3280 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3281 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3282 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3283 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3284 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3285 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3286 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3287 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3288 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3289 } 3290 #endif 3291 3292 /* assemble new additional is (if any) */ 3293 if (nis) { 3294 PetscInt **temp_idxs,*count_is,j,psum; 3295 3296 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3297 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3298 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3299 ptr_idxs = recv_buffer_idxs_is; 3300 psum = 0; 3301 for (i=0;i<n_recvs;i++) { 3302 for (j=0;j<nis;j++) { 3303 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3304 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3305 psum += plen; 3306 ptr_idxs += plen+1; /* shift pointer to received data */ 3307 } 3308 } 3309 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3310 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3311 for (i=1;i<nis;i++) { 3312 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3313 } 3314 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3315 ptr_idxs = recv_buffer_idxs_is; 3316 for (i=0;i<n_recvs;i++) { 3317 for (j=0;j<nis;j++) { 3318 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3319 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3320 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3321 ptr_idxs += plen+1; /* shift pointer to received data */ 3322 } 3323 } 3324 for (i=0;i<nis;i++) { 3325 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3326 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3327 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3328 } 3329 ierr = PetscFree(count_is);CHKERRQ(ierr); 3330 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3331 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3332 } 3333 /* free workspace */ 3334 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3335 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3336 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3337 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3338 if (isdense) { 3339 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3340 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3341 } else { 3342 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3343 } 3344 if (nis) { 3345 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3346 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3347 } 3348 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3349 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3350 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3351 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3352 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3353 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3354 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3355 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3356 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3357 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3358 ierr = PetscFree(onodes);CHKERRQ(ierr); 3359 if (nis) { 3360 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3361 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3362 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3363 } 3364 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3365 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3366 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3367 for (i=0;i<nis;i++) { 3368 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3369 } 3370 } 3371 PetscFunctionReturn(0); 3372 } 3373 3374 /* temporary hack into ksp private data structure */ 3375 #include <petsc-private/kspimpl.h> 3376 3377 #undef __FUNCT__ 3378 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3379 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3380 { 3381 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3382 PC_IS *pcis = (PC_IS*)pc->data; 3383 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3384 MatNullSpace CoarseNullSpace=NULL; 3385 ISLocalToGlobalMapping coarse_islg; 3386 IS coarse_is,*isarray; 3387 PetscInt i,im_active=-1,active_procs=-1; 3388 PetscInt nis,nisdofs,nisneu; 3389 PC pc_temp; 3390 PCType coarse_pc_type; 3391 KSPType coarse_ksp_type; 3392 PetscBool multilevel_requested,multilevel_allowed; 3393 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3394 Mat t_coarse_mat_is; 3395 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3396 PetscMPIInt all_procs; 3397 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3398 PetscBool compute_vecs = PETSC_FALSE; 3399 PetscScalar *array; 3400 PetscErrorCode ierr; 3401 3402 PetscFunctionBegin; 3403 /* Assign global numbering to coarse dofs */ 3404 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3405 compute_vecs = PETSC_TRUE; 3406 PetscInt ocoarse_size; 3407 ocoarse_size = pcbddc->coarse_size; 3408 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3409 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3410 /* see if we can avoid some work */ 3411 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3412 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3413 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3414 coarse_reuse = PETSC_FALSE; 3415 } else { /* we can safely reuse already computed coarse matrix */ 3416 coarse_reuse = PETSC_TRUE; 3417 } 3418 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3419 coarse_reuse = PETSC_FALSE; 3420 } 3421 /* reset any subassembling information */ 3422 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3423 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3424 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3425 coarse_reuse = PETSC_TRUE; 3426 } 3427 3428 /* count "active" (i.e. with positive local size) and "void" processes */ 3429 im_active = !!(pcis->n); 3430 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3431 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3432 void_procs = all_procs-active_procs; 3433 csin_type_simple = PETSC_TRUE; 3434 redist = PETSC_FALSE; 3435 if (pcbddc->current_level && void_procs) { 3436 csin_ml = PETSC_TRUE; 3437 ncoarse_ml = void_procs; 3438 csin_ds = PETSC_TRUE; 3439 ncoarse_ds = void_procs; 3440 } else { 3441 csin_ml = PETSC_FALSE; 3442 ncoarse_ml = all_procs; 3443 if (void_procs) { 3444 csin_ds = PETSC_TRUE; 3445 ncoarse_ds = void_procs; 3446 csin_type_simple = PETSC_FALSE; 3447 } else { 3448 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3449 csin_ds = PETSC_TRUE; 3450 ncoarse_ds = pcbddc->redistribute_coarse; 3451 redist = PETSC_TRUE; 3452 } else { 3453 csin_ds = PETSC_FALSE; 3454 ncoarse_ds = all_procs; 3455 } 3456 } 3457 } 3458 3459 /* 3460 test if we can go multilevel: three conditions must be satisfied: 3461 - we have not exceeded the number of levels requested 3462 - we can actually subassemble the active processes 3463 - we can find a suitable number of MPI processes where we can place the subassembled problem 3464 */ 3465 multilevel_allowed = PETSC_FALSE; 3466 multilevel_requested = PETSC_FALSE; 3467 if (pcbddc->current_level < pcbddc->max_levels) { 3468 multilevel_requested = PETSC_TRUE; 3469 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3470 multilevel_allowed = PETSC_FALSE; 3471 } else { 3472 multilevel_allowed = PETSC_TRUE; 3473 } 3474 } 3475 /* determine number of process partecipating to coarse solver */ 3476 if (multilevel_allowed) { 3477 ncoarse = ncoarse_ml; 3478 csin = csin_ml; 3479 } else { 3480 ncoarse = ncoarse_ds; 3481 csin = csin_ds; 3482 } 3483 3484 /* creates temporary l2gmap and IS for coarse indexes */ 3485 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3486 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3487 3488 /* creates temporary MATIS object for coarse matrix */ 3489 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3490 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3491 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3492 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3493 #if 0 3494 { 3495 PetscViewer viewer; 3496 char filename[256]; 3497 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3498 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3499 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3500 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3501 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3502 } 3503 #endif 3504 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3505 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3506 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3507 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3508 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3509 3510 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3511 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3512 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3513 const PetscInt *idxs; 3514 ISLocalToGlobalMapping tmap; 3515 3516 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3517 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3518 /* allocate space for temporary storage */ 3519 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3520 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3521 /* allocate for IS array */ 3522 nisdofs = pcbddc->n_ISForDofsLocal; 3523 nisneu = !!pcbddc->NeumannBoundariesLocal; 3524 nis = nisdofs + nisneu; 3525 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3526 /* dofs splitting */ 3527 for (i=0;i<nisdofs;i++) { 3528 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3529 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3530 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3531 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3532 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3533 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3534 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3535 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3536 } 3537 /* neumann boundaries */ 3538 if (pcbddc->NeumannBoundariesLocal) { 3539 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3540 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3541 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3542 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3543 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3544 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3545 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3546 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3547 } 3548 /* free memory */ 3549 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3550 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3551 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3552 } else { 3553 nis = 0; 3554 nisdofs = 0; 3555 nisneu = 0; 3556 isarray = NULL; 3557 } 3558 /* destroy no longer needed map */ 3559 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3560 3561 /* restrict on coarse candidates (if needed) */ 3562 coarse_mat_is = NULL; 3563 if (csin) { 3564 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3565 if (redist) { 3566 PetscMPIInt rank; 3567 PetscInt spc,n_spc_p1,dest[1]; 3568 3569 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3570 spc = all_procs/pcbddc->redistribute_coarse; 3571 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 3572 if (rank > n_spc_p1*(spc+1)-1) { 3573 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3574 } else { 3575 dest[0] = rank/(spc+1); 3576 } 3577 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3578 ierr = ISView(pcbddc->coarse_subassembling_init,NULL);CHKERRQ(ierr); 3579 } else { 3580 PetscInt j,tissize,*nisindices; 3581 PetscInt *coarse_candidates; 3582 const PetscInt* tisindices; 3583 /* get coarse candidates' ranks in pc communicator */ 3584 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3585 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3586 for (i=0,j=0;i<all_procs;i++) { 3587 if (!coarse_candidates[i]) { 3588 coarse_candidates[j]=i; 3589 j++; 3590 } 3591 } 3592 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3593 /* get a suitable subassembling pattern */ 3594 if (csin_type_simple) { 3595 PetscMPIInt rank; 3596 PetscInt issize,isidx; 3597 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3598 if (im_active) { 3599 issize = 1; 3600 isidx = (PetscInt)rank; 3601 } else { 3602 issize = 0; 3603 isidx = -1; 3604 } 3605 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3606 } else { 3607 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3608 } 3609 if (pcbddc->dbg_flag) { 3610 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3611 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3612 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3613 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3614 for (i=0;i<j;i++) { 3615 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3616 } 3617 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3618 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3619 } 3620 /* shift the pattern on coarse candidates */ 3621 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3622 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3623 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3624 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3625 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3626 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3627 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3628 } 3629 } 3630 if (pcbddc->dbg_flag) { 3631 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3632 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3633 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3634 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3635 } 3636 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3637 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3638 } else { 3639 if (pcbddc->dbg_flag) { 3640 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3641 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3642 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3643 } 3644 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3645 coarse_mat_is = t_coarse_mat_is; 3646 } 3647 3648 /* create local to global scatters for coarse problem */ 3649 if (compute_vecs) { 3650 PetscInt lrows; 3651 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3652 if (coarse_mat_is) { 3653 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3654 } else { 3655 lrows = 0; 3656 } 3657 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3658 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3659 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3660 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3661 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3662 } 3663 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3664 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3665 3666 /* set defaults for coarse KSP and PC */ 3667 if (multilevel_allowed) { 3668 coarse_ksp_type = KSPRICHARDSON; 3669 coarse_pc_type = PCBDDC; 3670 } else { 3671 coarse_ksp_type = KSPPREONLY; 3672 coarse_pc_type = PCREDUNDANT; 3673 } 3674 3675 /* print some info if requested */ 3676 if (pcbddc->dbg_flag) { 3677 if (!multilevel_allowed) { 3678 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3679 if (multilevel_requested) { 3680 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3681 } else if (pcbddc->max_levels) { 3682 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3683 } 3684 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3685 } 3686 } 3687 3688 /* create the coarse KSP object only once with defaults */ 3689 if (coarse_mat_is) { 3690 MatReuse coarse_mat_reuse; 3691 PetscViewer dbg_viewer = NULL; 3692 if (pcbddc->dbg_flag) { 3693 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3694 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3695 } 3696 if (!pcbddc->coarse_ksp) { 3697 char prefix[256],str_level[16]; 3698 size_t len; 3699 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3700 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3701 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3702 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3703 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3704 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3705 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3706 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3707 /* prefix */ 3708 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3709 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3710 if (!pcbddc->current_level) { 3711 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3712 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3713 } else { 3714 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3715 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3716 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3717 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3718 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3719 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3720 } 3721 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3722 /* allow user customization */ 3723 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3724 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3725 } 3726 3727 /* get some info after set from options */ 3728 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3729 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3730 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3731 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3732 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3733 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3734 isbddc = PETSC_FALSE; 3735 } 3736 if (isredundant) { 3737 KSP inner_ksp; 3738 PC inner_pc; 3739 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3740 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3741 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3742 } 3743 3744 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3745 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3746 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3747 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3748 if (nisdofs) { 3749 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3750 for (i=0;i<nisdofs;i++) { 3751 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3752 } 3753 } 3754 if (nisneu) { 3755 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3756 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3757 } 3758 3759 /* assemble coarse matrix */ 3760 if (coarse_reuse) { 3761 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3762 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3763 coarse_mat_reuse = MAT_REUSE_MATRIX; 3764 } else { 3765 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3766 } 3767 if (isbddc || isnn) { 3768 if (pcbddc->coarsening_ratio > 1) { 3769 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3770 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3771 if (pcbddc->dbg_flag) { 3772 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3773 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3774 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3775 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3776 } 3777 } 3778 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3779 } else { 3780 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 3781 coarse_mat = coarse_mat_is; 3782 } 3783 } else { 3784 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3785 } 3786 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3787 3788 /* propagate symmetry info to coarse matrix */ 3789 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3790 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3791 3792 /* set operators */ 3793 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3794 if (pcbddc->dbg_flag) { 3795 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3796 } 3797 } else { /* processes non partecipating to coarse solver (if any) */ 3798 coarse_mat = 0; 3799 } 3800 ierr = PetscFree(isarray);CHKERRQ(ierr); 3801 #if 0 3802 { 3803 PetscViewer viewer; 3804 char filename[256]; 3805 sprintf(filename,"coarse_mat.m"); 3806 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3807 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3808 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3809 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3810 } 3811 #endif 3812 3813 /* Compute coarse null space (special handling by BDDC only) */ 3814 if (pcbddc->NullSpace) { 3815 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3816 } 3817 3818 if (pcbddc->coarse_ksp) { 3819 Vec crhs,csol; 3820 PetscBool ispreonly; 3821 if (CoarseNullSpace) { 3822 if (isbddc) { 3823 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3824 } else { 3825 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3826 } 3827 } 3828 /* setup coarse ksp */ 3829 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3830 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3831 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3832 /* hack */ 3833 if (!csol) { 3834 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3835 } 3836 if (!crhs) { 3837 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3838 } 3839 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3840 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3841 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3842 KSP check_ksp; 3843 KSPType check_ksp_type; 3844 PC check_pc; 3845 Vec check_vec,coarse_vec; 3846 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3847 PetscInt its; 3848 PetscBool compute_eigs; 3849 PetscReal *eigs_r,*eigs_c; 3850 PetscInt neigs; 3851 const char *prefix; 3852 3853 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3854 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3855 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3856 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3857 if (ispreonly) { 3858 check_ksp_type = KSPPREONLY; 3859 compute_eigs = PETSC_FALSE; 3860 } else { 3861 check_ksp_type = KSPGMRES; 3862 compute_eigs = PETSC_TRUE; 3863 } 3864 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3865 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3866 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3867 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3868 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3869 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3870 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3871 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3872 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3873 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3874 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3875 /* create random vec */ 3876 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3877 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3878 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3879 if (CoarseNullSpace) { 3880 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3881 } 3882 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3883 /* solve coarse problem */ 3884 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3885 if (CoarseNullSpace) { 3886 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3887 } 3888 /* set eigenvalue estimation if preonly has not been requested */ 3889 if (compute_eigs) { 3890 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3891 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3892 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3893 lambda_max = eigs_r[neigs-1]; 3894 lambda_min = eigs_r[0]; 3895 if (pcbddc->use_coarse_estimates) { 3896 if (lambda_max>lambda_min) { 3897 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3898 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3899 } 3900 } 3901 } 3902 3903 /* check coarse problem residual error */ 3904 if (pcbddc->dbg_flag) { 3905 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3906 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3907 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3908 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3909 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3910 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3911 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3912 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3913 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3914 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3915 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3916 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3917 if (compute_eigs) { 3918 PetscReal lambda_max_s,lambda_min_s; 3919 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 3920 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3921 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3922 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3923 for (i=0;i<neigs;i++) { 3924 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3925 } 3926 } 3927 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3928 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3929 } 3930 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3931 if (compute_eigs) { 3932 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3933 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3934 } 3935 } 3936 } 3937 /* print additional info */ 3938 if (pcbddc->dbg_flag) { 3939 /* waits until all processes reaches this point */ 3940 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3941 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3942 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3943 } 3944 3945 /* free memory */ 3946 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3947 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3948 PetscFunctionReturn(0); 3949 } 3950 3951 #undef __FUNCT__ 3952 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3953 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3954 { 3955 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3956 PC_IS* pcis = (PC_IS*)pc->data; 3957 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3958 PetscInt i,coarse_size; 3959 PetscInt *local_primal_indices; 3960 PetscErrorCode ierr; 3961 3962 PetscFunctionBegin; 3963 /* Compute global number of coarse dofs */ 3964 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3965 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3966 } 3967 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3968 3969 /* check numbering */ 3970 if (pcbddc->dbg_flag) { 3971 PetscScalar coarsesum,*array; 3972 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 3973 3974 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3975 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3976 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3977 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3978 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3979 for (i=0;i<pcbddc->local_primal_size;i++) { 3980 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3981 } 3982 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3983 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3984 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3985 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3986 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3987 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3988 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3989 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3990 for (i=0;i<pcis->n;i++) { 3991 if (array[i] == 1.0) { 3992 set_error = PETSC_TRUE; 3993 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3994 } 3995 } 3996 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3997 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3998 for (i=0;i<pcis->n;i++) { 3999 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4000 } 4001 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4002 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4003 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4004 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4005 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4006 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4007 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4008 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4009 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4010 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4011 for (i=0;i<pcbddc->local_primal_size;i++) { 4012 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4013 } 4014 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4015 } 4016 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4017 if (set_error_reduced) { 4018 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4019 } 4020 } 4021 /* get back data */ 4022 *coarse_size_n = coarse_size; 4023 *local_primal_indices_n = local_primal_indices; 4024 PetscFunctionReturn(0); 4025 } 4026 4027 #undef __FUNCT__ 4028 #define __FUNCT__ "PCBDDCGlobalToLocal" 4029 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4030 { 4031 IS localis_t; 4032 PetscInt i,lsize,*idxs,n; 4033 PetscScalar *vals; 4034 PetscErrorCode ierr; 4035 4036 PetscFunctionBegin; 4037 /* get indices in local ordering exploiting local to global map */ 4038 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4039 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 4040 for (i=0;i<lsize;i++) vals[i] = 1.0; 4041 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4042 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4043 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4044 if (idxs) { /* multilevel guard */ 4045 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4046 } 4047 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4048 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4049 ierr = PetscFree(vals);CHKERRQ(ierr); 4050 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4051 /* now compute set in local ordering */ 4052 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4053 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4054 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4055 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4056 for (i=0,lsize=0;i<n;i++) { 4057 if (PetscRealPart(vals[i]) > 0.5) { 4058 lsize++; 4059 } 4060 } 4061 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 4062 for (i=0,lsize=0;i<n;i++) { 4063 if (PetscRealPart(vals[i]) > 0.5) { 4064 idxs[lsize++] = i; 4065 } 4066 } 4067 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4068 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4069 *localis = localis_t; 4070 PetscFunctionReturn(0); 4071 } 4072 4073 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4074 #undef __FUNCT__ 4075 #define __FUNCT__ "PCBDDCMatMult_Private" 4076 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4077 { 4078 PCBDDCChange_ctx change_ctx; 4079 PetscErrorCode ierr; 4080 4081 PetscFunctionBegin; 4082 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4083 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4084 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4085 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4086 PetscFunctionReturn(0); 4087 } 4088 4089 #undef __FUNCT__ 4090 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4091 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4092 { 4093 PCBDDCChange_ctx change_ctx; 4094 PetscErrorCode ierr; 4095 4096 PetscFunctionBegin; 4097 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4098 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4099 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4100 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4101 PetscFunctionReturn(0); 4102 } 4103