1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscInt i; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 54 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 55 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 for (i=0;i<pcbddc->n_ISForDofs;i++) { 59 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 60 } 61 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 62 pcbddc->n_ISForDofs = 0; 63 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 64 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "PCBDDCResetTopography" 70 PetscErrorCode PCBDDCResetTopography(PC pc) 71 { 72 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 77 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 78 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "PCBDDCResetSolvers" 84 PetscErrorCode PCBDDCResetSolvers(PC pc) 85 { 86 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 87 PetscErrorCode ierr; 88 89 PetscFunctionBegin; 90 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 91 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 97 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 99 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 111 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 118 { 119 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 120 PC_IS *pcis = (PC_IS*)pc->data; 121 VecType impVecType; 122 PetscInt n_constraints,n_R,old_size; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 if (!pcbddc->ConstraintMatrix) { 127 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 128 } 129 /* get sizes */ 130 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 131 n_R = pcis->n-pcbddc->n_actual_vertices; 132 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 133 /* local work vectors (try to avoid unneeded work)*/ 134 /* R nodes */ 135 old_size = -1; 136 if (pcbddc->vec1_R) { 137 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 138 } 139 if (n_R != old_size) { 140 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 141 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 145 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 146 } 147 /* local primal dofs */ 148 old_size = -1; 149 if (pcbddc->vec1_P) { 150 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 151 } 152 if (pcbddc->local_primal_size != old_size) { 153 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 156 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 157 } 158 /* local explicit constraints */ 159 old_size = -1; 160 if (pcbddc->vec1_C) { 161 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 162 } 163 if (n_constraints && n_constraints != old_size) { 164 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 167 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCBDDCSetUpCorrection" 174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 175 { 176 PetscErrorCode ierr; 177 /* pointers to pcis and pcbddc */ 178 PC_IS* pcis = (PC_IS*)pc->data; 179 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 180 /* submatrices of local problem */ 181 Mat A_RV,A_VR,A_VV; 182 /* working matrices */ 183 Mat M1,M2,M3,C_CR; 184 /* working vectors */ 185 Vec vec1_C,vec2_C,vec1_V,vec2_V; 186 /* additional working stuff */ 187 IS is_aux; 188 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 189 const PetscScalar *array,*row_cmat_values; 190 const PetscInt *row_cmat_indices,*idx_R_local; 191 PetscInt *idx_V_B,*auxindices; 192 PetscInt n_vertices,n_constraints,size_of_constraint; 193 PetscInt i,j,n_R,n_D,n_B; 194 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 195 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 196 MatType impMatType; 197 /* some shortcuts to scalars */ 198 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 199 /* for debugging purposes */ 200 PetscReal *coarsefunctions_errors,*constraints_errors; 201 202 PetscFunctionBegin; 203 /* get number of vertices (corners plus constraints with change of basis) 204 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 205 n_vertices = pcbddc->n_actual_vertices; 206 n_constraints = pcbddc->local_primal_size-n_vertices; 207 /* Set Non-overlapping dimensions */ 208 n_B = pcis->n_B; n_D = pcis->n - n_B; 209 n_R = pcis->n-n_vertices; 210 211 /* Set types for local objects needed by BDDC precondtioner */ 212 impMatType = MATSEQDENSE; 213 214 /* Allocating some extra storage just to be safe */ 215 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 216 for (i=0;i<pcis->n;i++) auxindices[i]=i; 217 218 /* vertices in boundary numbering */ 219 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 220 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 221 if (i != n_vertices) { 222 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 223 } 224 225 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 226 if (n_constraints) { 227 /* see if we can save some allocations */ 228 if (pcbddc->local_auxmat2) { 229 PetscInt on_R,on_constraints; 230 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 231 if (on_R != n_R || on_constraints != n_constraints) { 232 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 233 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 234 } 235 } 236 /* work vectors */ 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 239 /* auxiliary matrices */ 240 if (!pcbddc->local_auxmat2) { 241 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 242 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 243 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 244 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 245 } 246 247 /* Extract constraints on R nodes: C_{CR} */ 248 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 249 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 250 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 251 252 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 253 for (i=0;i<n_constraints;i++) { 254 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 255 /* Get row of constraint matrix in R numbering */ 256 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 257 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 258 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 259 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 260 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 261 /* Solve for row of constraint matrix in R numbering */ 262 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 263 /* Set values in local_auxmat2 */ 264 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 265 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 266 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 267 } 268 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 271 272 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 273 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 274 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 275 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 276 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 277 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 278 ierr = MatSetUp(M1);CHKERRQ(ierr); 279 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 280 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 281 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 282 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 283 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 284 ierr = MatDestroy(&M2);CHKERRQ(ierr); 285 ierr = MatDestroy(&M3);CHKERRQ(ierr); 286 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 287 if (!pcbddc->local_auxmat1) { 288 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 289 } else { 290 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 291 } 292 } 293 294 /* Get submatrices from subdomain matrix */ 295 if (n_vertices) { 296 PetscInt ibs,mbs; 297 PetscBool issbaij; 298 Mat newmat; 299 300 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 301 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 302 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 303 if (ibs != mbs) { /* need to convert to SEQAIJ */ 304 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 308 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 309 } else { 310 /* this is safe */ 311 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 313 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 314 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 315 /* which of the two approaches is faster? */ 316 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 317 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 318 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 319 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 320 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 321 } else { 322 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 323 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 324 } 325 } 326 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 327 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 328 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 329 } 330 331 /* Matrix of coarse basis functions (local) */ 332 if (pcbddc->coarse_phi_B) { 333 PetscInt on_B,on_primal; 334 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 335 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 336 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 337 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 338 } 339 } 340 if (pcbddc->coarse_phi_D) { 341 PetscInt on_D,on_primal; 342 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 343 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 344 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 345 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 346 } 347 } 348 if (!pcbddc->coarse_phi_B) { 349 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 350 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 351 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 352 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 353 } 354 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 355 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 356 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 357 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 358 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 359 } 360 361 if (pcbddc->dbg_flag) { 362 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 363 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 364 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 365 } 366 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 367 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 368 369 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 370 371 /* vertices */ 372 for (i=0;i<n_vertices;i++) { 373 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 374 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 375 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 376 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 377 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 378 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 379 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 380 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 383 if (n_constraints) { 384 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 385 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 387 } 388 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 389 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 390 391 /* Set values in coarse basis function and subdomain part of coarse_mat */ 392 /* coarse basis functions */ 393 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 394 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 397 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 400 if (pcbddc->switch_static || pcbddc->dbg_flag) { 401 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 404 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 406 } 407 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 408 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 409 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 if (n_constraints) { 412 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 } 416 417 /* check */ 418 if (pcbddc->dbg_flag) { 419 /* assemble subdomain vector on local nodes */ 420 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 421 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 426 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 427 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 428 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 429 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 430 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 432 if (n_constraints) { 433 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 436 } 437 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 440 /* check saddle point solution */ 441 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 444 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 445 /* shift by the identity matrix */ 446 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 447 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 450 } 451 } 452 453 /* constraints */ 454 for (i=0;i<n_constraints;i++) { 455 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 456 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 457 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 458 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 459 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 460 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 461 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 462 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 463 if (n_vertices) { 464 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 465 } 466 /* Set values in coarse basis function and subdomain part of coarse_mat */ 467 /* coarse basis functions */ 468 j = i+n_vertices; /* don't touch this! */ 469 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 470 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 473 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 474 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 475 if (pcbddc->switch_static || pcbddc->dbg_flag) { 476 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 479 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 481 } 482 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 483 if (n_vertices) { 484 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 485 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 487 } 488 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 491 492 if (pcbddc->dbg_flag) { 493 /* assemble subdomain vector on nodes */ 494 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 495 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 496 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 498 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 499 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 500 /* assemble subdomain vector of lagrange multipliers */ 501 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 502 if (n_vertices) { 503 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 504 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 505 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 506 } 507 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 508 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 510 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 513 /* check saddle point solution */ 514 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 517 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 518 /* shift by the identity matrix */ 519 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 520 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 523 } 524 } 525 /* call assembling routines for local coarse basis */ 526 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 if (pcbddc->switch_static || pcbddc->dbg_flag) { 529 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 } 532 533 /* compute other basis functions for non-symmetric problems */ 534 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 535 if (!setsym || (setsym && !issym)) { 536 if (!pcbddc->coarse_psi_B) { 537 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 538 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 539 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 540 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 541 } 542 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 543 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 544 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 545 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 546 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 547 } 548 for (i=0;i<pcbddc->local_primal_size;i++) { 549 if (n_constraints) { 550 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 551 for (j=0;j<n_constraints;j++) { 552 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 553 } 554 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 555 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 556 } 557 if (i<n_vertices) { 558 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 559 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 560 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 561 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 562 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 563 if (n_constraints) { 564 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 565 } 566 } else { 567 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 568 } 569 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 570 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 571 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 574 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 575 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 576 if (i<n_vertices) { 577 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 578 } 579 if (pcbddc->switch_static || pcbddc->dbg_flag) { 580 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 583 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 584 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 585 } 586 587 if (pcbddc->dbg_flag) { 588 /* assemble subdomain vector on nodes */ 589 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 590 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 591 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 592 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 593 if (i<n_vertices) { 594 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 595 } 596 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 597 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 598 /* assemble subdomain vector of lagrange multipliers */ 599 for (j=0;j<pcbddc->local_primal_size;j++) { 600 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 601 } 602 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 603 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 604 /* check saddle point solution */ 605 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 608 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 609 /* shift by the identity matrix */ 610 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 611 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 614 } 615 } 616 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 if (pcbddc->switch_static || pcbddc->dbg_flag) { 619 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 } 622 } 623 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 624 /* Checking coarse_sub_mat and coarse basis functios */ 625 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 626 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 627 if (pcbddc->dbg_flag) { 628 Mat coarse_sub_mat; 629 Mat AUXMAT,TM1,TM2,TM3,TM4; 630 Mat coarse_phi_D,coarse_phi_B; 631 Mat coarse_psi_D,coarse_psi_B; 632 Mat A_II,A_BB,A_IB,A_BI; 633 MatType checkmattype=MATSEQAIJ; 634 PetscReal real_value; 635 636 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 637 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 638 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 639 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 640 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 641 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 642 if (pcbddc->coarse_psi_B) { 643 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 644 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 645 } 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 647 648 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 649 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 650 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 651 if (pcbddc->coarse_psi_B) { 652 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 653 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 654 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 655 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 656 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 657 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 658 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 659 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 660 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 661 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 662 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 663 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 664 } else { 665 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 666 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 667 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 668 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 669 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 670 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 671 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 672 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 673 } 674 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 675 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 676 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 677 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 678 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 679 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 680 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 685 for (i=0;i<pcbddc->local_primal_size;i++) { 686 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 687 } 688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 689 for (i=0;i<pcbddc->local_primal_size;i++) { 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 691 } 692 if (pcbddc->coarse_psi_B) { 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 694 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 696 } 697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 698 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 700 } 701 } 702 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 703 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 704 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 705 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 706 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 707 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 708 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 709 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 710 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 711 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 712 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 713 if (pcbddc->coarse_psi_B) { 714 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 715 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 716 } 717 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 718 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 719 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 720 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 721 } 722 /* free memory */ 723 if (n_vertices) { 724 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 725 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 726 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 727 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 729 } 730 if (n_constraints) { 731 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 732 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 733 ierr = MatDestroy(&M1);CHKERRQ(ierr); 734 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 735 } 736 ierr = PetscFree(auxindices);CHKERRQ(ierr); 737 /* get back data */ 738 *coarse_submat_vals_n = coarse_submat_vals; 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 744 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 745 { 746 PC_IS* pcis = (PC_IS*)(pc->data); 747 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 748 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 749 PetscBool issbaij,isbaij; 750 /* manage repeated solves */ 751 MatReuse reuse; 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 756 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 757 } 758 /* get mat flags */ 759 reuse = MAT_INITIAL_MATRIX; 760 if (pc->setupcalled) { 761 if (pc->flag == SAME_NONZERO_PATTERN) { 762 reuse = MAT_REUSE_MATRIX; 763 } else { 764 reuse = MAT_INITIAL_MATRIX; 765 } 766 } 767 if (reuse == MAT_INITIAL_MATRIX) { 768 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 769 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 770 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 771 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 772 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 773 } 774 775 /* transform local matrices if needed */ 776 if (pcbddc->use_change_of_basis) { 777 Mat change_mat_all; 778 PetscScalar *row_cmat_values; 779 PetscInt *row_cmat_indices; 780 PetscInt *nnz,*is_indices,*temp_indices; 781 PetscInt i,j,k,n_D,n_B; 782 783 /* Get Non-overlapping dimensions */ 784 n_B = pcis->n_B; 785 n_D = pcis->n-n_B; 786 787 /* compute nonzero structure of change of basis on all local nodes */ 788 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 789 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 790 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 791 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 792 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 793 k=1; 794 for (i=0;i<n_B;i++) { 795 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 796 nnz[is_indices[i]]=j; 797 if (k < j) k = j; 798 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 799 } 800 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 801 /* assemble change of basis matrix on the whole set of local dofs */ 802 ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr); 803 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 804 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 805 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 806 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 807 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 808 for (i=0;i<n_D;i++) { 809 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 810 } 811 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 812 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 813 for (i=0;i<n_B;i++) { 814 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 815 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 816 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 817 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 818 } 819 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 820 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 821 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 822 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 823 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 824 if (!issbaij && !isbaij) { 825 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 826 } else { 827 Mat work_mat; 828 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 829 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 830 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 831 } 832 /* 833 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 834 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 835 */ 836 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 837 ierr = PetscFree(nnz);CHKERRQ(ierr); 838 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 839 } else { 840 /* without change of basis, the local matrix is unchanged */ 841 if (!pcbddc->local_mat) { 842 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 843 pcbddc->local_mat = matis->A; 844 } 845 } 846 847 /* get submatrices */ 848 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 849 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 850 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 851 if (!issbaij) { 852 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 853 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 854 } else { 855 Mat newmat; 856 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 857 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 858 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 859 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 860 } 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 866 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 867 { 868 PC_IS* pcis = (PC_IS*)(pc->data); 869 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 870 IS is_aux1,is_aux2; 871 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 872 PetscInt n_vertices,i,j,n_R,n_D,n_B; 873 PetscInt vbs,bs; 874 PetscBT bitmask; 875 PetscErrorCode ierr; 876 877 PetscFunctionBegin; 878 /* No need to setup local scatters if primal space is unchanged */ 879 if (!pcbddc->new_primal_space_local) { 880 PetscFunctionReturn(0); 881 } 882 /* destroy old objects */ 883 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 884 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 885 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 886 /* Set Non-overlapping dimensions */ 887 n_B = pcis->n_B; n_D = pcis->n - n_B; 888 n_vertices = pcbddc->n_actual_vertices; 889 /* create auxiliary bitmask */ 890 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 891 for (i=0;i<n_vertices;i++) { 892 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 893 } 894 895 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 896 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 897 for (i=0, n_R=0; i<pcis->n; i++) { 898 if (!PetscBTLookup(bitmask,i)) { 899 idx_R_local[n_R] = i; 900 n_R++; 901 } 902 } 903 904 /* Block code */ 905 vbs = 1; 906 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 907 if (bs>1 && !(n_vertices%bs)) { 908 PetscBool is_blocked = PETSC_TRUE; 909 PetscInt *vary; 910 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 911 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 912 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 913 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 914 for (i=0; i<n_vertices; i++) { 915 if (vary[i]!=0 && vary[i]!=bs) { 916 is_blocked = PETSC_FALSE; 917 break; 918 } 919 } 920 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 921 vbs = bs; 922 for (i=0;i<n_R/vbs;i++) { 923 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 924 } 925 } 926 ierr = PetscFree(vary);CHKERRQ(ierr); 927 } 928 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 929 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 930 931 /* print some info if requested */ 932 if (pcbddc->dbg_flag) { 933 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 934 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 935 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 936 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 937 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 938 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 939 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 940 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 941 } 942 943 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 944 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 945 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 946 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 947 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 948 for (i=0; i<n_D; i++) { 949 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 950 } 951 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 952 for (i=0, j=0; i<n_R; i++) { 953 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 954 aux_array1[j++] = i; 955 } 956 } 957 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 958 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 959 for (i=0, j=0; i<n_B; i++) { 960 if (!PetscBTLookup(bitmask,is_indices[i])) { 961 aux_array2[j++] = i; 962 } 963 } 964 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 965 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 966 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 967 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 968 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 969 970 if (pcbddc->switch_static || pcbddc->dbg_flag) { 971 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 972 for (i=0, j=0; i<n_R; i++) { 973 if (PetscBTLookup(bitmask,idx_R_local[i])) { 974 aux_array1[j++] = i; 975 } 976 } 977 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 978 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 979 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 980 } 981 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 982 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 989 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 990 { 991 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 992 PC_IS *pcis = (PC_IS*)pc->data; 993 PC pc_temp; 994 Mat A_RR; 995 MatReuse reuse; 996 PetscScalar m_one = -1.0; 997 PetscReal value; 998 PetscInt n_D,n_R,ibs,mbs; 999 PetscBool use_exact,use_exact_reduced,issbaij; 1000 PetscErrorCode ierr; 1001 /* prefixes stuff */ 1002 char dir_prefix[256],neu_prefix[256],str_level[3]; 1003 size_t len; 1004 1005 PetscFunctionBegin; 1006 1007 /* compute prefixes */ 1008 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1009 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1010 if (!pcbddc->current_level) { 1011 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1012 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1013 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1014 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1015 } else { 1016 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1017 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1018 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1019 len -= 15; /* remove "pc_bddc_coarse_" */ 1020 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1021 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1022 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1023 *(dir_prefix+len)='\0'; 1024 *(neu_prefix+len)='\0'; 1025 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1026 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1027 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1028 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1029 } 1030 1031 /* DIRICHLET PROBLEM */ 1032 /* Matrix for Dirichlet problem is pcis->A_II */ 1033 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1034 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1035 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1036 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1037 /* default */ 1038 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1039 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1040 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1041 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1042 if (issbaij) { 1043 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1044 } else { 1045 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1046 } 1047 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1048 } 1049 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1050 /* Allow user's customization */ 1051 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1052 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1053 if (!n_D) { 1054 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1055 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1056 } 1057 /* Set Up KSP for Dirichlet problem of BDDC */ 1058 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1059 /* set ksp_D into pcis data */ 1060 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1061 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1062 pcis->ksp_D = pcbddc->ksp_D; 1063 1064 /* NEUMANN PROBLEM */ 1065 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1066 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1067 if (pcbddc->ksp_R) { /* already created ksp */ 1068 PetscInt nn_R; 1069 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1070 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1071 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1072 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1073 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1074 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1075 reuse = MAT_INITIAL_MATRIX; 1076 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1077 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1078 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1079 reuse = MAT_INITIAL_MATRIX; 1080 } else { /* safe to reuse the matrix */ 1081 reuse = MAT_REUSE_MATRIX; 1082 } 1083 } 1084 /* last check */ 1085 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1086 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1087 reuse = MAT_INITIAL_MATRIX; 1088 } 1089 } else { /* first time, so we need to create the matrix */ 1090 reuse = MAT_INITIAL_MATRIX; 1091 } 1092 /* extract A_RR */ 1093 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1094 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1095 if (ibs != mbs) { 1096 Mat newmat; 1097 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1098 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1099 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1100 } else { 1101 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1102 } 1103 if (!pcbddc->ksp_R) { /* create object if not present */ 1104 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1105 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1106 /* default */ 1107 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1108 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1109 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1110 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1111 if (issbaij) { 1112 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1113 } else { 1114 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1115 } 1116 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1117 } 1118 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1119 /* Allow user's customization */ 1120 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1121 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1122 if (!n_R) { 1123 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1124 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1125 } 1126 /* Set Up KSP for Neumann problem of BDDC */ 1127 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1128 1129 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1130 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1131 /* Dirichlet */ 1132 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1133 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1134 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1135 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1136 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1137 /* need to be adapted? */ 1138 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1139 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1140 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1141 /* print info */ 1142 if (pcbddc->dbg_flag) { 1143 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1144 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1146 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1147 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1148 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1149 } 1150 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1151 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1152 } 1153 1154 /* Neumann */ 1155 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1156 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1157 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1158 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1159 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1160 /* need to be adapted? */ 1161 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1162 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1163 /* print info */ 1164 if (pcbddc->dbg_flag) { 1165 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1166 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1167 } 1168 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1169 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1170 } 1171 } 1172 /* free Neumann problem's matrix */ 1173 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1179 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1180 { 1181 PetscErrorCode ierr; 1182 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1183 1184 PetscFunctionBegin; 1185 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1186 if (pcbddc->local_auxmat1) { 1187 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1188 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1189 } 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1195 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1196 { 1197 PetscErrorCode ierr; 1198 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1199 PC_IS* pcis = (PC_IS*) (pc->data); 1200 const PetscScalar zero = 0.0; 1201 1202 PetscFunctionBegin; 1203 /* Application of PHI^T (or PSI^T) */ 1204 if (pcbddc->coarse_psi_B) { 1205 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1206 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1207 } else { 1208 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1209 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1210 } 1211 /* Scatter data of coarse_rhs */ 1212 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1213 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 1215 /* Local solution on R nodes */ 1216 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1217 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1218 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1219 if (pcbddc->switch_static) { 1220 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1221 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1222 } 1223 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1224 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1225 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1226 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1227 if (pcbddc->switch_static) { 1228 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 } 1231 1232 /* Coarse solution */ 1233 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1235 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1236 } 1237 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1238 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1239 1240 /* Sum contributions from two levels */ 1241 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1242 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1248 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1249 { 1250 PetscErrorCode ierr; 1251 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1252 1253 PetscFunctionBegin; 1254 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1260 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1261 { 1262 PetscErrorCode ierr; 1263 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1264 1265 PetscFunctionBegin; 1266 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1267 PetscFunctionReturn(0); 1268 } 1269 1270 /* uncomment for testing purposes */ 1271 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1272 #undef __FUNCT__ 1273 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1274 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1275 { 1276 PetscErrorCode ierr; 1277 PC_IS* pcis = (PC_IS*)(pc->data); 1278 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1279 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1280 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1281 MatType impMatType=MATSEQAIJ; 1282 /* one and zero */ 1283 PetscScalar one=1.0,zero=0.0; 1284 /* space to store constraints and their local indices */ 1285 PetscScalar *temp_quadrature_constraint; 1286 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1287 /* iterators */ 1288 PetscInt i,j,k,total_counts,temp_start_ptr; 1289 /* stuff to store connected components stored in pcbddc->mat_graph */ 1290 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1291 PetscInt n_ISForFaces,n_ISForEdges; 1292 /* near null space stuff */ 1293 MatNullSpace nearnullsp; 1294 const Vec *nearnullvecs; 1295 Vec *localnearnullsp; 1296 PetscBool nnsp_has_cnst; 1297 PetscInt nnsp_size; 1298 PetscScalar *array; 1299 /* BLAS integers */ 1300 PetscBLASInt lwork,lierr; 1301 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1302 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1303 /* LAPACK working arrays for SVD or POD */ 1304 PetscBool skip_lapack; 1305 PetscScalar *work; 1306 PetscReal *singular_vals; 1307 #if defined(PETSC_USE_COMPLEX) 1308 PetscReal *rwork; 1309 #endif 1310 #if defined(PETSC_MISSING_LAPACK_GESVD) 1311 PetscBLASInt Blas_one_2=1; 1312 PetscScalar *temp_basis,*correlation_mat; 1313 #else 1314 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1315 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1316 #endif 1317 /* reuse */ 1318 PetscInt olocal_primal_size; 1319 PetscInt *oprimal_indices_local_idxs; 1320 /* change of basis */ 1321 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1322 PetscBool boolforchange,qr_needed; 1323 PetscBT touched,change_basis,qr_needed_idx; 1324 /* auxiliary stuff */ 1325 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1326 /* some quantities */ 1327 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1328 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1329 1330 1331 PetscFunctionBegin; 1332 /* Destroy Mat objects computed previously */ 1333 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1334 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1335 /* Get index sets for faces, edges and vertices from graph */ 1336 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1337 pcbddc->use_vertices = PETSC_TRUE; 1338 } 1339 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1340 /* HACK: provide functions to set change of basis */ 1341 if (!ISForVertices && pcbddc->NullSpace) { 1342 pcbddc->use_change_of_basis = PETSC_TRUE; 1343 pcbddc->use_change_on_faces = PETSC_FALSE; 1344 } 1345 /* print some info */ 1346 if (pcbddc->dbg_flag) { 1347 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1348 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1349 i = 0; 1350 if (ISForVertices) { 1351 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1352 } 1353 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1354 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1355 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1356 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1357 } 1358 /* check if near null space is attached to global mat */ 1359 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1360 if (nearnullsp) { 1361 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1362 /* remove any stored info */ 1363 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1364 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1365 /* store information for BDDC solver reuse */ 1366 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1367 pcbddc->onearnullspace = nearnullsp; 1368 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1369 for (i=0;i<nnsp_size;i++) { 1370 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1371 } 1372 } else { /* if near null space is not provided BDDC uses constants by default */ 1373 nnsp_size = 0; 1374 nnsp_has_cnst = PETSC_TRUE; 1375 } 1376 /* get max number of constraints on a single cc */ 1377 max_constraints = nnsp_size; 1378 if (nnsp_has_cnst) max_constraints++; 1379 1380 /* 1381 Evaluate maximum storage size needed by the procedure 1382 - temp_indices will contain start index of each constraint stored as follows 1383 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1384 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1385 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1386 */ 1387 total_counts = n_ISForFaces+n_ISForEdges; 1388 total_counts *= max_constraints; 1389 n_vertices = 0; 1390 if (ISForVertices) { 1391 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1392 } 1393 total_counts += n_vertices; 1394 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1395 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1396 total_counts = 0; 1397 max_size_of_constraint = 0; 1398 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1399 if (i<n_ISForEdges) { 1400 used_IS = &ISForEdges[i]; 1401 } else { 1402 used_IS = &ISForFaces[i-n_ISForEdges]; 1403 } 1404 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1405 total_counts += j; 1406 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1407 } 1408 total_counts *= max_constraints; 1409 total_counts += n_vertices; 1410 ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr); 1411 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr); 1412 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1413 /* get local part of global near null space vectors */ 1414 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1415 for (k=0;k<nnsp_size;k++) { 1416 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1417 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1418 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1419 } 1420 1421 /* whether or not to skip lapack calls */ 1422 skip_lapack = PETSC_TRUE; 1423 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1424 1425 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1426 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1427 PetscScalar temp_work; 1428 #if defined(PETSC_MISSING_LAPACK_GESVD) 1429 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1430 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1431 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1432 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1433 #if defined(PETSC_USE_COMPLEX) 1434 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1435 #endif 1436 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1437 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1438 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1439 lwork = -1; 1440 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1441 #if !defined(PETSC_USE_COMPLEX) 1442 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1443 #else 1444 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1445 #endif 1446 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1447 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1448 #else /* on missing GESVD */ 1449 /* SVD */ 1450 PetscInt max_n,min_n; 1451 max_n = max_size_of_constraint; 1452 min_n = max_constraints; 1453 if (max_size_of_constraint < max_constraints) { 1454 min_n = max_size_of_constraint; 1455 max_n = max_constraints; 1456 } 1457 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1458 #if defined(PETSC_USE_COMPLEX) 1459 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1460 #endif 1461 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1462 lwork = -1; 1463 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1464 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1465 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1466 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1467 #if !defined(PETSC_USE_COMPLEX) 1468 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1469 #else 1470 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1471 #endif 1472 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1473 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1474 #endif /* on missing GESVD */ 1475 /* Allocate optimal workspace */ 1476 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1477 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1478 } 1479 /* Now we can loop on constraining sets */ 1480 total_counts = 0; 1481 temp_indices[0] = 0; 1482 /* vertices */ 1483 if (ISForVertices) { 1484 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1485 if (nnsp_has_cnst) { /* consider all vertices */ 1486 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1487 for (i=0;i<n_vertices;i++) { 1488 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1489 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1490 total_counts++; 1491 } 1492 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1493 PetscBool used_vertex; 1494 for (i=0;i<n_vertices;i++) { 1495 used_vertex = PETSC_FALSE; 1496 k = 0; 1497 while (!used_vertex && k<nnsp_size) { 1498 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1499 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1500 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1501 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1502 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1503 total_counts++; 1504 used_vertex = PETSC_TRUE; 1505 } 1506 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1507 k++; 1508 } 1509 } 1510 } 1511 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1512 n_vertices = total_counts; 1513 } 1514 1515 /* edges and faces */ 1516 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1517 if (i<n_ISForEdges) { 1518 used_IS = &ISForEdges[i]; 1519 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1520 } else { 1521 used_IS = &ISForFaces[i-n_ISForEdges]; 1522 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1523 } 1524 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1525 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1526 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1527 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1528 /* change of basis should not be performed on local periodic nodes */ 1529 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1530 if (nnsp_has_cnst) { 1531 PetscScalar quad_value; 1532 temp_constraints++; 1533 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1534 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1535 for (j=0;j<size_of_constraint;j++) { 1536 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1537 } 1538 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1539 total_counts++; 1540 } 1541 for (k=0;k<nnsp_size;k++) { 1542 PetscReal real_value; 1543 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1544 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1545 for (j=0;j<size_of_constraint;j++) { 1546 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1547 } 1548 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1549 /* check if array is null on the connected component */ 1550 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1551 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1552 if (real_value > 0.0) { /* keep indices and values */ 1553 temp_constraints++; 1554 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1555 total_counts++; 1556 } 1557 } 1558 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1559 valid_constraints = temp_constraints; 1560 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1561 if (!pcbddc->use_nnsp_true && temp_constraints) { 1562 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1563 1564 #if defined(PETSC_MISSING_LAPACK_GESVD) 1565 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1566 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1567 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1568 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1569 from that computed using LAPACKgesvd 1570 -> This is due to a different computation of eigenvectors in LAPACKheev 1571 -> The quality of the POD-computed basis will be the same */ 1572 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1573 /* Store upper triangular part of correlation matrix */ 1574 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1575 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1576 for (j=0;j<temp_constraints;j++) { 1577 for (k=0;k<j+1;k++) { 1578 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1579 } 1580 } 1581 /* compute eigenvalues and eigenvectors of correlation matrix */ 1582 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1583 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1584 #if !defined(PETSC_USE_COMPLEX) 1585 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1586 #else 1587 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1588 #endif 1589 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1590 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1591 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1592 j=0; 1593 while (j < temp_constraints && singular_vals[j] < tol) j++; 1594 total_counts=total_counts-j; 1595 valid_constraints = temp_constraints-j; 1596 /* scale and copy POD basis into used quadrature memory */ 1597 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1598 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1599 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1600 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1601 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1602 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1603 if (j<temp_constraints) { 1604 PetscInt ii; 1605 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1606 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1607 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1608 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1609 for (k=0;k<temp_constraints-j;k++) { 1610 for (ii=0;ii<size_of_constraint;ii++) { 1611 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1612 } 1613 } 1614 } 1615 #else /* on missing GESVD */ 1616 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1617 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1618 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1619 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1620 #if !defined(PETSC_USE_COMPLEX) 1621 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1622 #else 1623 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1624 #endif 1625 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1626 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1627 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1628 k = temp_constraints; 1629 if (k > size_of_constraint) k = size_of_constraint; 1630 j = 0; 1631 while (j < k && singular_vals[k-j-1] < tol) j++; 1632 total_counts = total_counts-temp_constraints+k-j; 1633 valid_constraints = k-j; 1634 #endif /* on missing GESVD */ 1635 } 1636 /* setting change_of_basis flag is safe now */ 1637 if (boolforchange) { 1638 for (j=0;j<valid_constraints;j++) { 1639 PetscBTSet(change_basis,total_counts-j-1); 1640 } 1641 } 1642 } 1643 /* free index sets of faces, edges and vertices */ 1644 for (i=0;i<n_ISForFaces;i++) { 1645 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1646 } 1647 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1648 for (i=0;i<n_ISForEdges;i++) { 1649 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1650 } 1651 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1652 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1653 /* map temp_indices_to_constraint in boundary numbering */ 1654 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1655 if (i != temp_indices[total_counts]) { 1656 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1657 } 1658 1659 /* free workspace */ 1660 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1661 ierr = PetscFree(work);CHKERRQ(ierr); 1662 #if defined(PETSC_USE_COMPLEX) 1663 ierr = PetscFree(rwork);CHKERRQ(ierr); 1664 #endif 1665 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1666 #if defined(PETSC_MISSING_LAPACK_GESVD) 1667 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1668 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1669 #endif 1670 } 1671 for (k=0;k<nnsp_size;k++) { 1672 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1673 } 1674 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1675 1676 /* set quantities in pcbddc data structure and store previous primal size */ 1677 /* n_vertices defines the number of subdomain corners in the primal space */ 1678 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1679 olocal_primal_size = pcbddc->local_primal_size; 1680 pcbddc->local_primal_size = total_counts; 1681 pcbddc->n_vertices = n_vertices; 1682 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1683 1684 /* Create constraint matrix */ 1685 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1686 /* so we need to set it up properly either with or without change of basis */ 1687 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1688 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1689 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1690 /* array to compute a local numbering of constraints : vertices first then constraints */ 1691 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1692 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1693 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1694 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1695 /* auxiliary stuff for basis change */ 1696 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1697 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1698 1699 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1700 total_primal_vertices=0; 1701 for (i=0;i<pcbddc->local_primal_size;i++) { 1702 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1703 if (size_of_constraint == 1) { 1704 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1705 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1706 aux_primal_minloc[total_primal_vertices]=0; 1707 total_primal_vertices++; 1708 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1709 PetscInt min_loc,min_index; 1710 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1711 /* find first untouched local node */ 1712 k = 0; 1713 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1714 min_index = global_indices[k]; 1715 min_loc = k; 1716 /* search the minimum among global nodes already untouched on the cc */ 1717 for (k=1;k<size_of_constraint;k++) { 1718 /* there can be more than one constraint on a single connected component */ 1719 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1720 min_index = global_indices[k]; 1721 min_loc = k; 1722 } 1723 } 1724 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1725 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1726 aux_primal_minloc[total_primal_vertices]=min_loc; 1727 total_primal_vertices++; 1728 } 1729 } 1730 /* determine if a QR strategy is needed for change of basis */ 1731 qr_needed = PETSC_FALSE; 1732 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1733 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1734 if (PetscBTLookup(change_basis,i)) { 1735 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1736 j = 0; 1737 for (k=0;k<size_of_constraint;k++) { 1738 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1739 j++; 1740 } 1741 } 1742 /* found more than one primal dof on the cc */ 1743 if (j > 1) { 1744 PetscBTSet(qr_needed_idx,i); 1745 qr_needed = PETSC_TRUE; 1746 } 1747 } 1748 } 1749 /* free workspace */ 1750 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1751 1752 /* permute indices in order to have a sorted set of vertices */ 1753 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1754 1755 /* nonzero structure of constraint matrix */ 1756 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1757 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1758 j=total_primal_vertices; 1759 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1760 if (!PetscBTLookup(change_basis,i)) { 1761 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1762 j++; 1763 } 1764 } 1765 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1766 ierr = PetscFree(nnz);CHKERRQ(ierr); 1767 /* set values in constraint matrix */ 1768 for (i=0;i<total_primal_vertices;i++) { 1769 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1770 } 1771 total_counts = total_primal_vertices; 1772 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1773 if (!PetscBTLookup(change_basis,i)) { 1774 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1775 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1776 total_counts++; 1777 } 1778 } 1779 /* assembling */ 1780 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1781 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1782 /* 1783 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1784 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1785 */ 1786 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1787 if (pcbddc->use_change_of_basis) { 1788 /* dual and primal dofs on a single cc */ 1789 PetscInt dual_dofs,primal_dofs; 1790 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1791 PetscInt primal_counter; 1792 /* working stuff for GEQRF */ 1793 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1794 PetscBLASInt lqr_work; 1795 /* working stuff for UNGQR */ 1796 PetscScalar *gqr_work,lgqr_work_t; 1797 PetscBLASInt lgqr_work; 1798 /* working stuff for TRTRS */ 1799 PetscScalar *trs_rhs; 1800 PetscBLASInt Blas_NRHS; 1801 /* pointers for values insertion into change of basis matrix */ 1802 PetscInt *start_rows,*start_cols; 1803 PetscScalar *start_vals; 1804 /* working stuff for values insertion */ 1805 PetscBT is_primal; 1806 1807 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1808 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1809 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1810 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1811 /* work arrays */ 1812 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1813 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1814 /* nonzeros per row */ 1815 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1816 if (PetscBTLookup(change_basis,i)) { 1817 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1818 if (PetscBTLookup(qr_needed_idx,i)) { 1819 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1820 } else { 1821 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1822 /* get local primal index on the cc */ 1823 j = 0; 1824 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1825 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1826 } 1827 } 1828 } 1829 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1830 ierr = PetscFree(nnz);CHKERRQ(ierr); 1831 /* Set initial identity in the matrix */ 1832 for (i=0;i<pcis->n_B;i++) { 1833 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1834 } 1835 1836 if (pcbddc->dbg_flag) { 1837 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1838 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1839 } 1840 1841 1842 /* Now we loop on the constraints which need a change of basis */ 1843 /* 1844 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1845 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1846 1847 Basic blocks of change of basis matrix T computed 1848 1849 - Using the following block transformation if there is only a primal dof on the cc 1850 (in the example, primal dof is the last one of the edge in LOCAL ordering 1851 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1852 | 1 0 ... 0 1 | 1853 | 0 1 ... 0 1 | 1854 | ... | 1855 | 0 ... 1 1 | 1856 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1857 1858 - via QR decomposition of constraints otherwise 1859 */ 1860 if (qr_needed) { 1861 /* space to store Q */ 1862 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 1863 /* first we issue queries for optimal work */ 1864 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1865 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1867 lqr_work = -1; 1868 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1869 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1870 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1871 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 1872 lgqr_work = -1; 1873 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1874 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1875 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1876 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1877 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1878 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1879 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1880 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1881 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 1882 /* array to store scaling factors for reflectors */ 1883 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 1884 /* array to store rhs and solution of triangular solver */ 1885 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 1886 /* allocating workspace for check */ 1887 if (pcbddc->dbg_flag) { 1888 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 1889 } 1890 } 1891 /* array to store whether a node is primal or not */ 1892 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1893 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 1894 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1895 if (i != total_primal_vertices) { 1896 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1897 } 1898 for (i=0;i<total_primal_vertices;i++) { 1899 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1900 } 1901 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1902 1903 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1904 /* -> using implicit ordering contained in temp_indices data */ 1905 total_counts = pcbddc->n_vertices; 1906 primal_counter = total_counts; 1907 while (total_counts<pcbddc->local_primal_size) { 1908 primal_dofs = 1; 1909 if (PetscBTLookup(change_basis,total_counts)) { 1910 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1911 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1912 primal_dofs++; 1913 } 1914 /* get constraint info */ 1915 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1916 dual_dofs = size_of_constraint-primal_dofs; 1917 1918 if (pcbddc->dbg_flag) { 1919 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1920 } 1921 1922 if (primal_dofs > 1) { /* QR */ 1923 1924 /* copy quadrature constraints for change of basis check */ 1925 if (pcbddc->dbg_flag) { 1926 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1927 } 1928 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1929 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1930 1931 /* compute QR decomposition of constraints */ 1932 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1933 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1934 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1935 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1936 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1937 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1938 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1939 1940 /* explictly compute R^-T */ 1941 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1942 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1943 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1944 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1946 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1947 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1948 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1949 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1950 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1951 1952 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1953 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1954 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1955 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1956 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1957 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1958 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1959 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1960 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1961 1962 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1963 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1964 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1965 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1966 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1967 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1968 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1969 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1970 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1971 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1972 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1973 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1974 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1975 1976 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1977 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1978 /* insert cols for primal dofs */ 1979 for (j=0;j<primal_dofs;j++) { 1980 start_vals = &qr_basis[j*size_of_constraint]; 1981 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1982 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1983 } 1984 /* insert cols for dual dofs */ 1985 for (j=0,k=0;j<dual_dofs;k++) { 1986 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1987 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1988 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1989 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1990 j++; 1991 } 1992 } 1993 1994 /* check change of basis */ 1995 if (pcbddc->dbg_flag) { 1996 PetscInt ii,jj; 1997 PetscBool valid_qr=PETSC_TRUE; 1998 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1999 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2000 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2001 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2002 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2003 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2004 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2005 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2006 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2007 for (jj=0;jj<size_of_constraint;jj++) { 2008 for (ii=0;ii<primal_dofs;ii++) { 2009 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2010 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2011 } 2012 } 2013 if (!valid_qr) { 2014 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2015 for (jj=0;jj<size_of_constraint;jj++) { 2016 for (ii=0;ii<primal_dofs;ii++) { 2017 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2018 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2019 } 2020 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2021 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2022 } 2023 } 2024 } 2025 } else { 2026 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2027 } 2028 } 2029 } else { /* simple transformation block */ 2030 PetscInt row,col; 2031 PetscScalar val; 2032 for (j=0;j<size_of_constraint;j++) { 2033 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2034 if (!PetscBTLookup(is_primal,row)) { 2035 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2036 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2037 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2038 } else { 2039 for (k=0;k<size_of_constraint;k++) { 2040 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2041 if (row != col) { 2042 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2043 } else { 2044 val = 1.0; 2045 } 2046 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2047 } 2048 } 2049 } 2050 } 2051 /* increment primal counter */ 2052 primal_counter += primal_dofs; 2053 } else { 2054 if (pcbddc->dbg_flag) { 2055 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2056 } 2057 } 2058 /* increment constraint counter total_counts */ 2059 total_counts += primal_dofs; 2060 } 2061 2062 /* free workspace */ 2063 if (qr_needed) { 2064 if (pcbddc->dbg_flag) { 2065 ierr = PetscFree(work);CHKERRQ(ierr); 2066 } 2067 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2068 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2069 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2070 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2071 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2072 } 2073 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2074 /* assembling */ 2075 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2076 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2077 /* 2078 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2079 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2080 */ 2081 } 2082 2083 /* get indices in local ordering for vertices and constraints */ 2084 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2085 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2086 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2087 } 2088 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2089 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2090 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2091 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2092 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2093 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2094 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2095 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2096 /* set quantities in PCBDDC data struct */ 2097 pcbddc->n_actual_vertices = i; 2098 /* check if a new primal space has been introduced */ 2099 pcbddc->new_primal_space_local = PETSC_TRUE; 2100 if (olocal_primal_size == pcbddc->local_primal_size) { 2101 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2102 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2103 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2104 } 2105 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2106 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2107 2108 /* flush dbg viewer */ 2109 if (pcbddc->dbg_flag) { 2110 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2111 } 2112 2113 /* free workspace */ 2114 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2115 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2116 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2117 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2118 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2119 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2120 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2121 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2122 PetscFunctionReturn(0); 2123 } 2124 2125 #undef __FUNCT__ 2126 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2127 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2128 { 2129 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2130 PC_IS *pcis = (PC_IS*)pc->data; 2131 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2132 PetscInt bs,ierr,i,vertex_size; 2133 PetscViewer viewer=pcbddc->dbg_viewer; 2134 2135 PetscFunctionBegin; 2136 /* Reset previously computed graph */ 2137 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2138 /* Init local Graph struct */ 2139 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2140 2141 /* Check validity of the csr graph passed in by the user */ 2142 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2143 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2144 } 2145 2146 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2147 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2148 Mat mat_adj; 2149 const PetscInt *xadj,*adjncy; 2150 PetscBool flg_row=PETSC_TRUE; 2151 2152 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2153 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2154 if (!flg_row) { 2155 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2156 } 2157 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2158 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2159 if (!flg_row) { 2160 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2161 } 2162 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2163 } 2164 2165 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2166 vertex_size = 1; 2167 if (!pcbddc->user_provided_isfordofs) { 2168 if (!pcbddc->n_ISForDofs) { 2169 IS *custom_ISForDofs; 2170 2171 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2172 ierr = PetscMalloc1(bs,&custom_ISForDofs);CHKERRQ(ierr); 2173 for (i=0;i<bs;i++) { 2174 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2175 } 2176 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2177 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2178 /* remove my references to IS objects */ 2179 for (i=0;i<bs;i++) { 2180 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2181 } 2182 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2183 } 2184 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2185 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2186 } 2187 2188 /* Setup of Graph */ 2189 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2190 2191 /* Graph's connected components analysis */ 2192 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2193 2194 /* print some info to stdout */ 2195 if (pcbddc->dbg_flag) { 2196 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2197 } 2198 2199 /* mark topography has done */ 2200 pcbddc->recompute_topography = PETSC_FALSE; 2201 PetscFunctionReturn(0); 2202 } 2203 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2206 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2207 { 2208 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2209 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2210 PetscErrorCode ierr; 2211 2212 PetscFunctionBegin; 2213 n = 0; 2214 vertices = 0; 2215 if (pcbddc->ConstraintMatrix) { 2216 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2217 for (i=0;i<local_primal_size;i++) { 2218 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2219 if (size_of_constraint == 1) n++; 2220 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2221 } 2222 if (vertices_idx) { 2223 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2224 n = 0; 2225 for (i=0;i<local_primal_size;i++) { 2226 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2227 if (size_of_constraint == 1) { 2228 vertices[n++]=row_cmat_indices[0]; 2229 } 2230 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2231 } 2232 } 2233 } 2234 *n_vertices = n; 2235 if (vertices_idx) *vertices_idx = vertices; 2236 PetscFunctionReturn(0); 2237 } 2238 2239 #undef __FUNCT__ 2240 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2241 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2242 { 2243 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2244 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2245 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2246 PetscBT touched; 2247 PetscErrorCode ierr; 2248 2249 /* This function assumes that the number of local constraints per connected component 2250 is not greater than the number of nodes defined for the connected component 2251 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2252 PetscFunctionBegin; 2253 n = 0; 2254 constraints_index = 0; 2255 if (pcbddc->ConstraintMatrix) { 2256 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2257 max_size_of_constraint = 0; 2258 for (i=0;i<local_primal_size;i++) { 2259 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2260 if (size_of_constraint > 1) { 2261 n++; 2262 } 2263 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2264 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2265 } 2266 if (constraints_idx) { 2267 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2268 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2269 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2270 n = 0; 2271 for (i=0;i<local_primal_size;i++) { 2272 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2273 if (size_of_constraint > 1) { 2274 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2275 /* find first untouched local node */ 2276 j = 0; 2277 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2278 min_index = row_cmat_global_indices[j]; 2279 min_loc = j; 2280 /* search the minimum among nodes not yet touched on the connected component 2281 since there can be more than one constraint on a single cc */ 2282 for (j=1;j<size_of_constraint;j++) { 2283 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2284 min_index = row_cmat_global_indices[j]; 2285 min_loc = j; 2286 } 2287 } 2288 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2289 constraints_index[n++] = row_cmat_indices[min_loc]; 2290 } 2291 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2292 } 2293 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2294 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2295 } 2296 } 2297 *n_constraints = n; 2298 if (constraints_idx) *constraints_idx = constraints_index; 2299 PetscFunctionReturn(0); 2300 } 2301 2302 /* the next two functions has been adapted from pcis.c */ 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "PCBDDCApplySchur" 2305 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2306 { 2307 PetscErrorCode ierr; 2308 PC_IS *pcis = (PC_IS*)(pc->data); 2309 2310 PetscFunctionBegin; 2311 if (!vec2_B) { vec2_B = v; } 2312 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2313 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2314 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2315 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2316 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 #undef __FUNCT__ 2321 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2322 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2323 { 2324 PetscErrorCode ierr; 2325 PC_IS *pcis = (PC_IS*)(pc->data); 2326 2327 PetscFunctionBegin; 2328 if (!vec2_B) { vec2_B = v; } 2329 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2330 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2331 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2332 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2333 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "PCBDDCSubsetNumbering" 2339 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2340 { 2341 Vec local_vec,global_vec; 2342 IS seqis,paris; 2343 VecScatter scatter_ctx; 2344 PetscScalar *array; 2345 PetscInt *temp_global_dofs; 2346 PetscScalar globalsum; 2347 PetscInt i,j,s; 2348 PetscInt nlocals,first_index,old_index,max_local; 2349 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2350 PetscMPIInt *dof_sizes,*dof_displs; 2351 PetscBool first_found; 2352 PetscErrorCode ierr; 2353 2354 PetscFunctionBegin; 2355 /* mpi buffers */ 2356 MPI_Comm_size(comm,&size_prec_comm); 2357 MPI_Comm_rank(comm,&rank_prec_comm); 2358 j = ( !rank_prec_comm ? size_prec_comm : 0); 2359 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2360 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2361 /* get maximum size of subset */ 2362 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2363 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2364 max_local = 0; 2365 if (n_local_dofs) { 2366 max_local = temp_global_dofs[0]; 2367 for (i=1;i<n_local_dofs;i++) { 2368 if (max_local < temp_global_dofs[i] ) { 2369 max_local = temp_global_dofs[i]; 2370 } 2371 } 2372 } 2373 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2374 max_global++; 2375 max_local = 0; 2376 if (n_local_dofs) { 2377 max_local = local_dofs[0]; 2378 for (i=1;i<n_local_dofs;i++) { 2379 if (max_local < local_dofs[i] ) { 2380 max_local = local_dofs[i]; 2381 } 2382 } 2383 } 2384 max_local++; 2385 /* allocate workspace */ 2386 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2387 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2388 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2389 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2390 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2391 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2392 /* create scatter */ 2393 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2394 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2395 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2396 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2397 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2398 /* init array */ 2399 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2400 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2401 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2402 if (local_dofs_mult) { 2403 for (i=0;i<n_local_dofs;i++) { 2404 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2405 } 2406 } else { 2407 for (i=0;i<n_local_dofs;i++) { 2408 array[local_dofs[i]]=1.0; 2409 } 2410 } 2411 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2412 /* scatter into global vec and get total number of global dofs */ 2413 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2414 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2415 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2416 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2417 /* Fill global_vec with cumulative function for global numbering */ 2418 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2419 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2420 nlocals = 0; 2421 first_index = -1; 2422 first_found = PETSC_FALSE; 2423 for (i=0;i<s;i++) { 2424 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2425 first_found = PETSC_TRUE; 2426 first_index = i; 2427 } 2428 nlocals += (PetscInt)PetscRealPart(array[i]); 2429 } 2430 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2431 if (!rank_prec_comm) { 2432 dof_displs[0]=0; 2433 for (i=1;i<size_prec_comm;i++) { 2434 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2435 } 2436 } 2437 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2438 if (first_found) { 2439 array[first_index] += (PetscScalar)nlocals; 2440 old_index = first_index; 2441 for (i=first_index+1;i<s;i++) { 2442 if (PetscRealPart(array[i]) > 0.0) { 2443 array[i] += array[old_index]; 2444 old_index = i; 2445 } 2446 } 2447 } 2448 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2449 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2450 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2451 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2452 /* get global ordering of local dofs */ 2453 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2454 if (local_dofs_mult) { 2455 for (i=0;i<n_local_dofs;i++) { 2456 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2457 } 2458 } else { 2459 for (i=0;i<n_local_dofs;i++) { 2460 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2461 } 2462 } 2463 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2464 /* free workspace */ 2465 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2466 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2467 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2468 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2469 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2470 /* return pointer to global ordering of local dofs */ 2471 *global_numbering_subset = temp_global_dofs; 2472 PetscFunctionReturn(0); 2473 } 2474 2475 #undef __FUNCT__ 2476 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2477 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2478 { 2479 PetscInt i,j; 2480 PetscScalar *alphas; 2481 PetscErrorCode ierr; 2482 2483 PetscFunctionBegin; 2484 /* this implements stabilized Gram-Schmidt */ 2485 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2486 for (i=0;i<n;i++) { 2487 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2488 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2489 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2490 } 2491 ierr = PetscFree(alphas);CHKERRQ(ierr); 2492 PetscFunctionReturn(0); 2493 } 2494 2495 /* TODO 2496 - now preallocation is done assuming SEQDENSE local matrices 2497 */ 2498 #undef __FUNCT__ 2499 #define __FUNCT__ "MatISGetMPIXAIJ" 2500 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2501 { 2502 Mat new_mat; 2503 Mat_IS *matis = (Mat_IS*)(mat->data); 2504 /* info on mat */ 2505 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2506 PetscInt bs,rows,cols; 2507 PetscInt lrows,lcols; 2508 PetscInt local_rows,local_cols; 2509 PetscBool isdense; 2510 /* values insertion */ 2511 PetscScalar *array; 2512 PetscInt *local_indices,*global_indices; 2513 /* work */ 2514 PetscInt i,j,index_row; 2515 PetscErrorCode ierr; 2516 2517 PetscFunctionBegin; 2518 /* MISSING CHECKS 2519 - rectangular case not covered (it is not allowed by MATIS) 2520 */ 2521 /* get info from mat */ 2522 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2523 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2524 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2525 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2526 2527 /* work */ 2528 ierr = PetscMalloc1(local_rows,&local_indices);CHKERRQ(ierr); 2529 for (i=0;i<local_rows;i++) local_indices[i]=i; 2530 /* map indices of local mat to global values */ 2531 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2532 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2533 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2534 2535 if (reuse==MAT_INITIAL_MATRIX) { 2536 Vec vec_dnz,vec_onz; 2537 PetscScalar *my_dnz,*my_onz; 2538 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2539 PetscInt index_col,owner; 2540 PetscMPIInt nsubdomains; 2541 2542 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2543 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2544 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2545 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2546 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2547 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2548 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2549 2550 /* 2551 preallocation 2552 */ 2553 2554 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2555 /* 2556 Some vectors are needed to sum up properly on shared interface dofs. 2557 Preallocation macros cannot do the job. 2558 Note that preallocation is not exact, since it overestimates nonzeros 2559 */ 2560 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2561 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2562 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2563 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2564 /* All processes need to compute entire row ownership */ 2565 ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr); 2566 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2567 for (i=0;i<nsubdomains;i++) { 2568 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2569 row_ownership[j]=i; 2570 } 2571 } 2572 2573 /* 2574 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2575 then, they will be summed up properly. This way, preallocation is always sufficient 2576 */ 2577 ierr = PetscMalloc1(local_rows,&my_dnz);CHKERRQ(ierr); 2578 ierr = PetscMalloc1(local_rows,&my_onz);CHKERRQ(ierr); 2579 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2580 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2581 for (i=0;i<local_rows;i++) { 2582 index_row = global_indices[i]; 2583 for (j=i;j<local_rows;j++) { 2584 owner = row_ownership[index_row]; 2585 index_col = global_indices[j]; 2586 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2587 my_dnz[i] += 1.0; 2588 } else { /* offdiag block */ 2589 my_onz[i] += 1.0; 2590 } 2591 /* same as before, interchanging rows and cols */ 2592 if (i != j) { 2593 owner = row_ownership[index_col]; 2594 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2595 my_dnz[j] += 1.0; 2596 } else { 2597 my_onz[j] += 1.0; 2598 } 2599 } 2600 } 2601 } 2602 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2603 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2604 if (local_rows) { /* multilevel guard */ 2605 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2606 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2607 } 2608 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2609 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2610 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2611 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2612 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2613 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2614 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2615 2616 /* set computed preallocation in dnz and onz */ 2617 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2618 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2619 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2620 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2621 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2622 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2623 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2624 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2625 2626 /* Resize preallocation if overestimated */ 2627 for (i=0;i<lrows;i++) { 2628 dnz[i] = PetscMin(dnz[i],lcols); 2629 onz[i] = PetscMin(onz[i],cols-lcols); 2630 } 2631 /* set preallocation */ 2632 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2633 for (i=0;i<lrows/bs;i++) { 2634 dnz[i] = dnz[i*bs]/bs; 2635 onz[i] = onz[i*bs]/bs; 2636 } 2637 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2638 for (i=0;i<lrows/bs;i++) { 2639 dnz[i] = dnz[i]-i; 2640 } 2641 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2642 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2643 *M = new_mat; 2644 } else { 2645 PetscInt mbs,mrows,mcols; 2646 /* some checks */ 2647 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2648 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2649 if (mrows != rows) { 2650 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2651 } 2652 if (mrows != rows) { 2653 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2654 } 2655 if (mbs != bs) { 2656 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2657 } 2658 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2659 } 2660 /* set local to global mappings */ 2661 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2662 /* Set values */ 2663 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2664 if (isdense) { /* special case for dense local matrices */ 2665 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2666 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2667 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2668 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2669 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2670 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2671 } else { /* very basic values insertion for all other matrix types */ 2672 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2673 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2674 for (i=0;i<local_rows;i++) { 2675 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2676 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2677 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2678 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2679 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2680 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2681 } 2682 } 2683 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2684 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2685 if (isdense) { 2686 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2687 } 2688 PetscFunctionReturn(0); 2689 } 2690 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatISSubassemble_Private" 2693 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2694 { 2695 Mat subdomain_adj; 2696 IS new_ranks,ranks_send_to; 2697 MatPartitioning partitioner; 2698 Mat_IS *matis; 2699 PetscInt n_neighs,*neighs,*n_shared,**shared; 2700 PetscInt prank; 2701 PetscMPIInt size,rank,color; 2702 PetscInt *xadj,*adjncy,*oldranks; 2703 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2704 PetscInt i,j,n_subdomains,local_size,threshold=0; 2705 PetscErrorCode ierr; 2706 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2707 PetscSubcomm subcomm; 2708 2709 PetscFunctionBegin; 2710 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2711 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2712 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2713 2714 /* Get info on mapping */ 2715 matis = (Mat_IS*)(mat->data); 2716 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2717 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2718 2719 /* build local CSR graph of subdomains' connectivity */ 2720 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2721 xadj[0] = 0; 2722 xadj[1] = PetscMax(n_neighs-1,0); 2723 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2724 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2725 2726 if (threshold) { 2727 PetscInt* count,min_threshold; 2728 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2729 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2730 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2731 for (j=0;j<n_shared[i];j++) { 2732 count[shared[i][j]] += 1; 2733 } 2734 } 2735 /* adapt threshold since we dont want to lose significant connections */ 2736 min_threshold = n_neighs; 2737 for (i=1;i<n_neighs;i++) { 2738 for (j=0;j<n_shared[i];j++) { 2739 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2740 } 2741 } 2742 threshold = PetscMax(min_threshold+1,threshold); 2743 xadj[1] = 0; 2744 for (i=1;i<n_neighs;i++) { 2745 for (j=0;j<n_shared[i];j++) { 2746 if (count[shared[i][j]] < threshold) { 2747 adjncy[xadj[1]] = neighs[i]; 2748 adjncy_wgt[xadj[1]] = n_shared[i]; 2749 xadj[1]++; 2750 break; 2751 } 2752 } 2753 } 2754 ierr = PetscFree(count);CHKERRQ(ierr); 2755 } else { 2756 if (xadj[1]) { 2757 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2758 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2759 } 2760 } 2761 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2762 if (use_square) { 2763 for (i=0;i<xadj[1];i++) { 2764 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2765 } 2766 } 2767 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2768 2769 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2770 2771 /* 2772 Restrict work on active processes only. 2773 */ 2774 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2775 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2776 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2777 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2778 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2779 if (color) { 2780 ierr = PetscFree(xadj);CHKERRQ(ierr); 2781 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2782 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2783 } else { 2784 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2785 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2786 prank = rank; 2787 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2788 for (i=0;i<size;i++) { 2789 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2790 } 2791 for (i=0;i<xadj[1];i++) { 2792 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2793 } 2794 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2795 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2796 n_subdomains = (PetscInt)size; 2797 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2798 2799 /* Partition */ 2800 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2801 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2802 if (use_vwgt) { 2803 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2804 v_wgt[0] = local_size; 2805 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2806 } 2807 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2808 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2809 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2810 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2811 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2812 2813 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2814 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2815 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2816 /* clean up */ 2817 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2818 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2819 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2820 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2821 } 2822 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2823 2824 /* assemble parallel IS for sends */ 2825 i = 1; 2826 if (color) i=0; 2827 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2828 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2829 2830 /* get back IS */ 2831 *is_sends = ranks_send_to; 2832 PetscFunctionReturn(0); 2833 } 2834 2835 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2836 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "MatISSubassemble" 2839 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2840 { 2841 Mat local_mat,new_mat; 2842 Mat_IS *matis; 2843 IS is_sends_internal; 2844 PetscInt rows,cols; 2845 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2846 PetscBool ismatis,isdense; 2847 ISLocalToGlobalMapping l2gmap; 2848 PetscInt* l2gmap_indices; 2849 const PetscInt* is_indices; 2850 MatType new_local_type; 2851 MatTypePrivate new_local_type_private; 2852 /* buffers */ 2853 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2854 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2855 /* MPI */ 2856 MPI_Comm comm; 2857 PetscMPIInt n_sends,n_recvs,commsize; 2858 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2859 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2860 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2861 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2862 PetscErrorCode ierr; 2863 2864 PetscFunctionBegin; 2865 /* checks */ 2866 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2867 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2868 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2869 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2870 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2871 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2872 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2873 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2874 PetscValidLogicalCollectiveInt(mat,bs,0); 2875 /* prepare IS for sending if not provided */ 2876 if (!is_sends) { 2877 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2878 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2879 } else { 2880 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2881 is_sends_internal = is_sends; 2882 } 2883 2884 /* get pointer of MATIS data */ 2885 matis = (Mat_IS*)mat->data; 2886 2887 /* get comm */ 2888 comm = PetscObjectComm((PetscObject)mat); 2889 2890 /* compute number of sends */ 2891 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2892 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2893 2894 /* compute number of receives */ 2895 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2896 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 2897 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2898 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2899 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2900 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2901 ierr = PetscFree(iflags);CHKERRQ(ierr); 2902 2903 /* prepare send/receive buffers */ 2904 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 2905 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2906 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 2907 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2908 2909 /* Get data from local mat */ 2910 if (!isdense) { 2911 /* TODO: See below some guidelines on how to prepare the local buffers */ 2912 /* 2913 send_buffer_vals should contain the raw values of the local matrix 2914 send_buffer_idxs should contain: 2915 - MatType_PRIVATE type 2916 - PetscInt size_of_l2gmap 2917 - PetscInt global_row_indices[size_of_l2gmap] 2918 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2919 */ 2920 } else { 2921 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2922 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2923 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 2924 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2925 send_buffer_idxs[1] = i; 2926 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2927 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2928 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2929 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2930 for (i=0;i<n_sends;i++) { 2931 ilengths_vals[is_indices[i]] = len*len; 2932 ilengths_idxs[is_indices[i]] = len+2; 2933 } 2934 } 2935 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2936 buf_size_idxs = 0; 2937 buf_size_vals = 0; 2938 for (i=0;i<n_recvs;i++) { 2939 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2940 buf_size_vals += (PetscInt)olengths_vals[i]; 2941 } 2942 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 2943 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 2944 2945 /* get new tags for clean communications */ 2946 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2947 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2948 2949 /* allocate for requests */ 2950 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 2951 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 2952 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 2953 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 2954 2955 /* communications */ 2956 ptr_idxs = recv_buffer_idxs; 2957 ptr_vals = recv_buffer_vals; 2958 for (i=0;i<n_recvs;i++) { 2959 source_dest = onodes[i]; 2960 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2961 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2962 ptr_idxs += olengths_idxs[i]; 2963 ptr_vals += olengths_vals[i]; 2964 } 2965 for (i=0;i<n_sends;i++) { 2966 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2967 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2968 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2969 } 2970 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2971 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2972 2973 /* assemble new l2g map */ 2974 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2975 ptr_idxs = recv_buffer_idxs; 2976 buf_size_idxs = 0; 2977 for (i=0;i<n_recvs;i++) { 2978 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2979 ptr_idxs += olengths_idxs[i]; 2980 } 2981 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 2982 ptr_idxs = recv_buffer_idxs; 2983 buf_size_idxs = 0; 2984 for (i=0;i<n_recvs;i++) { 2985 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2986 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2987 ptr_idxs += olengths_idxs[i]; 2988 } 2989 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2990 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2991 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2992 2993 /* infer new local matrix type from received local matrices type */ 2994 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2995 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2996 new_local_type_private = MATAIJ_PRIVATE; 2997 new_local_type = MATSEQAIJ; 2998 if (n_recvs) { 2999 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3000 ptr_idxs = recv_buffer_idxs; 3001 for (i=0;i<n_recvs;i++) { 3002 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3003 new_local_type_private = MATAIJ_PRIVATE; 3004 break; 3005 } 3006 ptr_idxs += olengths_idxs[i]; 3007 } 3008 switch (new_local_type_private) { 3009 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 3010 new_local_type = MATSEQAIJ; 3011 bs = 1; 3012 break; 3013 case MATAIJ_PRIVATE: 3014 new_local_type = MATSEQAIJ; 3015 bs = 1; 3016 break; 3017 case MATBAIJ_PRIVATE: 3018 new_local_type = MATSEQBAIJ; 3019 break; 3020 case MATSBAIJ_PRIVATE: 3021 new_local_type = MATSEQSBAIJ; 3022 break; 3023 default: 3024 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3025 break; 3026 } 3027 } 3028 3029 /* create MATIS object */ 3030 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3031 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 3032 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3033 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 3034 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3035 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3036 3037 /* set values */ 3038 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3039 ptr_vals = recv_buffer_vals; 3040 ptr_idxs = recv_buffer_idxs; 3041 for (i=0;i<n_recvs;i++) { 3042 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3043 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3044 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3045 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3046 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3047 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3048 } 3049 ptr_idxs += olengths_idxs[i]; 3050 ptr_vals += olengths_vals[i]; 3051 } 3052 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3053 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3054 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3055 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3056 3057 { /* check */ 3058 Vec lvec,rvec; 3059 PetscReal infty_error; 3060 3061 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3062 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3063 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3064 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3065 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 3066 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3067 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3068 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3069 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3070 } 3071 3072 /* free workspace */ 3073 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3074 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3075 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3076 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3077 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3078 if (isdense) { 3079 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3080 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3081 } else { 3082 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3083 } 3084 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3085 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3086 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3087 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3088 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3089 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3090 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3091 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3092 ierr = PetscFree(onodes);CHKERRQ(ierr); 3093 /* get back new mat */ 3094 *mat_n = new_mat; 3095 PetscFunctionReturn(0); 3096 } 3097 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3100 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3101 { 3102 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3103 PC_IS *pcis = (PC_IS*)pc->data; 3104 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3105 MatNullSpace CoarseNullSpace=NULL; 3106 ISLocalToGlobalMapping coarse_islg; 3107 IS coarse_is; 3108 PetscInt max_it; 3109 PetscInt im_active=-1,active_procs=-1; 3110 PC pc_temp; 3111 PCType coarse_pc_type; 3112 KSPType coarse_ksp_type; 3113 PetscBool multilevel_requested,multilevel_allowed; 3114 PetscBool setsym,issym,isbddc,isnn,coarse_reuse; 3115 PetscErrorCode ierr; 3116 3117 PetscFunctionBegin; 3118 /* Assign global numbering to coarse dofs */ 3119 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3120 PetscInt ocoarse_size; 3121 ocoarse_size = pcbddc->coarse_size; 3122 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3123 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3124 /* see if we can avoid some work */ 3125 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3126 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3127 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3128 coarse_reuse = PETSC_FALSE; 3129 } else { /* we can safely reuse already computed coarse matrix */ 3130 coarse_reuse = PETSC_TRUE; 3131 } 3132 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3133 coarse_reuse = PETSC_FALSE; 3134 } 3135 } else { /* primal space has not been changed, so we can reuse coarse matrix */ 3136 coarse_reuse = PETSC_TRUE; 3137 } 3138 3139 /* infer some info from user */ 3140 issym = PETSC_FALSE; 3141 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3142 multilevel_allowed = PETSC_FALSE; 3143 multilevel_requested = PETSC_FALSE; 3144 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3145 if (multilevel_requested) { 3146 /* count "active processes" */ 3147 im_active = !!(pcis->n); 3148 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3149 if (active_procs/pcbddc->coarsening_ratio < 2) { 3150 multilevel_allowed = PETSC_FALSE; 3151 } else { 3152 multilevel_allowed = PETSC_TRUE; 3153 } 3154 } 3155 3156 /* set defaults for coarse KSP and PC */ 3157 if (multilevel_allowed) { 3158 if (issym) { 3159 coarse_ksp_type = KSPRICHARDSON; 3160 } else { 3161 coarse_ksp_type = KSPCHEBYSHEV; 3162 } 3163 coarse_pc_type = PCBDDC; 3164 } else { 3165 coarse_ksp_type = KSPPREONLY; 3166 coarse_pc_type = PCREDUNDANT; 3167 } 3168 3169 /* create the coarse KSP object only once with defaults */ 3170 if (!pcbddc->coarse_ksp) { 3171 char prefix[256],str_level[3]; 3172 size_t len; 3173 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3174 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3175 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3176 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3177 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3178 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3179 /* prefix */ 3180 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3181 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3182 if (!pcbddc->current_level) { 3183 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3184 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3185 } else { 3186 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3187 if (pcbddc->current_level>1) len -= 2; 3188 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3189 *(prefix+len)='\0'; 3190 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3191 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3192 } 3193 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3194 } 3195 /* allow user customization */ 3196 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3197 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3198 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3199 3200 /* get some info after set from options */ 3201 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3202 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3203 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3204 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3205 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3206 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3207 isbddc = PETSC_FALSE; 3208 } 3209 3210 /* propagate BDDC info to the next level */ 3211 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3212 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3213 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3214 3215 /* creates temporary MATIS object for coarse matrix */ 3216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3217 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3218 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3219 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3220 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3221 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3222 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3223 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3224 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3225 3226 /* assemble coarse matrix */ 3227 if (isbddc || isnn) { 3228 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3229 } else { 3230 if (coarse_reuse) { 3231 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3232 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3233 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3234 } else { 3235 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3236 } 3237 } 3238 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3239 3240 /* create local to global scatters for coarse problem */ 3241 if (pcbddc->new_primal_space) { 3242 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3243 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3244 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3245 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3246 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3247 } 3248 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3249 3250 /* propagate symmetry info to coarse matrix */ 3251 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3252 3253 /* Compute coarse null space (special handling by BDDC only) */ 3254 if (pcbddc->NullSpace) { 3255 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3256 if (isbddc) { 3257 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3258 } else { 3259 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3260 } 3261 } 3262 3263 /* set operators */ 3264 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3265 3266 /* additional KSP customization */ 3267 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3268 if (max_it < 5) { 3269 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3270 } 3271 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3272 3273 3274 /* print some info if requested */ 3275 if (pcbddc->dbg_flag) { 3276 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3277 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3278 if (!multilevel_allowed) { 3279 if (multilevel_requested) { 3280 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3281 } else if (pcbddc->max_levels) { 3282 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3283 } 3284 } 3285 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3286 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3287 } 3288 3289 /* setup coarse ksp */ 3290 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3291 if (pcbddc->dbg_flag) { 3292 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3293 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3294 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3295 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3296 } 3297 3298 /* Check coarse problem if requested */ 3299 if (pcbddc->dbg_flag) { 3300 KSP check_ksp; 3301 KSPType check_ksp_type; 3302 PC check_pc; 3303 Vec check_vec; 3304 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3305 PetscInt its; 3306 PetscBool ispreonly,compute; 3307 3308 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3309 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3310 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3311 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3312 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3313 if (ispreonly) { 3314 check_ksp_type = KSPPREONLY; 3315 compute = PETSC_FALSE; 3316 } else { 3317 if (issym) check_ksp_type = KSPCG; 3318 else check_ksp_type = KSPGMRES; 3319 compute = PETSC_TRUE; 3320 } 3321 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3322 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3323 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3324 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3325 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3326 /* create random vec */ 3327 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3328 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3329 if (CoarseNullSpace) { 3330 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3331 } 3332 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3333 /* solve coarse problem */ 3334 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3335 if (CoarseNullSpace) { 3336 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3337 } 3338 /* check coarse problem residual error */ 3339 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3340 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3341 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3342 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3343 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3344 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3345 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3346 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3347 /* get eigenvalue estimation if preonly has not been requested */ 3348 if (!ispreonly) { 3349 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3350 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3351 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3352 } 3353 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3354 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3355 } 3356 /* free memory */ 3357 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3358 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3359 PetscFunctionReturn(0); 3360 } 3361 3362 #undef __FUNCT__ 3363 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3364 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3365 { 3366 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3367 PC_IS* pcis = (PC_IS*)pc->data; 3368 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3369 PetscInt i,coarse_size; 3370 PetscInt *local_primal_indices; 3371 PetscErrorCode ierr; 3372 3373 PetscFunctionBegin; 3374 /* Compute global number of coarse dofs */ 3375 if (!pcbddc->primal_indices_local_idxs) { 3376 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 3377 } 3378 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3379 3380 /* check numbering */ 3381 if (pcbddc->dbg_flag) { 3382 PetscScalar coarsesum,*array; 3383 3384 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3385 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3386 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3387 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3388 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3389 for (i=0;i<pcbddc->local_primal_size;i++) { 3390 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3391 } 3392 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3393 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3394 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3395 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3396 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3397 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3398 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3399 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3400 for (i=0;i<pcis->n;i++) { 3401 if (array[i] == 1.0) { 3402 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3403 } 3404 } 3405 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3406 for (i=0;i<pcis->n;i++) { 3407 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3408 } 3409 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3410 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3411 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3412 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3413 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3414 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3415 if (pcbddc->dbg_flag > 1) { 3416 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3417 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3419 for (i=0;i<pcbddc->local_primal_size;i++) { 3420 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3421 } 3422 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3423 } 3424 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3425 } 3426 /* get back data */ 3427 *coarse_size_n = coarse_size; 3428 *local_primal_indices_n = local_primal_indices; 3429 PetscFunctionReturn(0); 3430 } 3431 3432