1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscInt i; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 54 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 55 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 for (i=0;i<pcbddc->n_ISForDofs;i++) { 59 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 60 } 61 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 62 pcbddc->n_ISForDofs = 0; 63 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 64 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "PCBDDCResetTopography" 70 PetscErrorCode PCBDDCResetTopography(PC pc) 71 { 72 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 77 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 78 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "PCBDDCResetSolvers" 84 PetscErrorCode PCBDDCResetSolvers(PC pc) 85 { 86 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 87 PetscErrorCode ierr; 88 89 PetscFunctionBegin; 90 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 91 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 97 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 99 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 111 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 118 { 119 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 120 PC_IS *pcis = (PC_IS*)pc->data; 121 VecType impVecType; 122 PetscInt n_constraints,n_R,old_size; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 if (!pcbddc->ConstraintMatrix) { 127 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 128 } 129 /* get sizes */ 130 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 131 n_R = pcis->n-pcbddc->n_actual_vertices; 132 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 133 /* local work vectors (try to avoid unneeded work)*/ 134 /* R nodes */ 135 old_size = -1; 136 if (pcbddc->vec1_R) { 137 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 138 } 139 if (n_R != old_size) { 140 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 141 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 145 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 146 } 147 /* local primal dofs */ 148 old_size = -1; 149 if (pcbddc->vec1_P) { 150 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 151 } 152 if (pcbddc->local_primal_size != old_size) { 153 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 156 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 157 } 158 /* local explicit constraints */ 159 old_size = -1; 160 if (pcbddc->vec1_C) { 161 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 162 } 163 if (n_constraints && n_constraints != old_size) { 164 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 167 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCBDDCSetUpCorrection" 174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 175 { 176 PetscErrorCode ierr; 177 /* pointers to pcis and pcbddc */ 178 PC_IS* pcis = (PC_IS*)pc->data; 179 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 180 /* submatrices of local problem */ 181 Mat A_RV,A_VR,A_VV; 182 /* working matrices */ 183 Mat M1,M2,M3,C_CR; 184 /* working vectors */ 185 Vec vec1_C,vec2_C,vec1_V,vec2_V; 186 /* additional working stuff */ 187 IS is_aux; 188 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 189 const PetscScalar *array,*row_cmat_values; 190 const PetscInt *row_cmat_indices,*idx_R_local; 191 PetscInt *idx_V_B,*auxindices; 192 PetscInt n_vertices,n_constraints,size_of_constraint; 193 PetscInt i,j,n_R,n_D,n_B; 194 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 195 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 196 MatType impMatType; 197 /* some shortcuts to scalars */ 198 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 199 /* for debugging purposes */ 200 PetscReal *coarsefunctions_errors,*constraints_errors; 201 202 PetscFunctionBegin; 203 /* get number of vertices (corners plus constraints with change of basis) 204 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 205 n_vertices = pcbddc->n_actual_vertices; 206 n_constraints = pcbddc->local_primal_size-n_vertices; 207 /* Set Non-overlapping dimensions */ 208 n_B = pcis->n_B; n_D = pcis->n - n_B; 209 n_R = pcis->n-n_vertices; 210 211 /* Set types for local objects needed by BDDC precondtioner */ 212 impMatType = MATSEQDENSE; 213 214 /* Allocating some extra storage just to be safe */ 215 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 216 for (i=0;i<pcis->n;i++) auxindices[i]=i; 217 218 /* vertices in boundary numbering */ 219 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 220 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 221 if (i != n_vertices) { 222 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 223 } 224 225 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 226 if (n_constraints) { 227 /* see if we can save some allocations */ 228 if (pcbddc->local_auxmat2) { 229 PetscInt on_R,on_constraints; 230 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 231 if (on_R != n_R || on_constraints != n_constraints) { 232 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 233 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 234 } 235 } 236 /* work vectors */ 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 239 /* auxiliary matrices */ 240 if (!pcbddc->local_auxmat2) { 241 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 242 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 243 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 244 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 245 } 246 247 /* Extract constraints on R nodes: C_{CR} */ 248 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 249 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 250 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 251 252 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 253 for (i=0;i<n_constraints;i++) { 254 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 255 /* Get row of constraint matrix in R numbering */ 256 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 257 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 258 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 259 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 260 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 261 /* Solve for row of constraint matrix in R numbering */ 262 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 263 /* Set values in local_auxmat2 */ 264 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 265 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 266 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 267 } 268 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 271 272 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 273 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 274 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 275 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 276 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 277 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 278 ierr = MatSetUp(M1);CHKERRQ(ierr); 279 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 280 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 281 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 282 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 283 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 284 ierr = MatDestroy(&M2);CHKERRQ(ierr); 285 ierr = MatDestroy(&M3);CHKERRQ(ierr); 286 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 287 if (!pcbddc->local_auxmat1) { 288 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 289 } else { 290 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 291 } 292 } 293 294 /* Get submatrices from subdomain matrix */ 295 if (n_vertices) { 296 PetscInt ibs,mbs; 297 PetscBool issbaij; 298 Mat newmat; 299 300 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 301 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 302 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 303 if (ibs != mbs) { /* need to convert to SEQAIJ */ 304 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 308 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 309 } else { 310 /* this is safe */ 311 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 313 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 314 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 315 /* which of the two approaches is faster? */ 316 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 317 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 318 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 319 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 320 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 321 } else { 322 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 323 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 324 } 325 } 326 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 327 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 328 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 329 } 330 331 /* Matrix of coarse basis functions (local) */ 332 if (pcbddc->coarse_phi_B) { 333 PetscInt on_B,on_primal; 334 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 335 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 336 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 337 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 338 } 339 } 340 if (pcbddc->coarse_phi_D) { 341 PetscInt on_D,on_primal; 342 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 343 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 344 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 345 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 346 } 347 } 348 if (!pcbddc->coarse_phi_B) { 349 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 350 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 351 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 352 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 353 } 354 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 355 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 356 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 357 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 358 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 359 } 360 361 if (pcbddc->dbg_flag) { 362 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 363 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 364 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 365 } 366 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 367 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 368 369 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 370 371 /* vertices */ 372 for (i=0;i<n_vertices;i++) { 373 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 374 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 375 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 376 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 377 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 378 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 379 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 380 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 383 if (n_constraints) { 384 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 385 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 387 } 388 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 389 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 390 391 /* Set values in coarse basis function and subdomain part of coarse_mat */ 392 /* coarse basis functions */ 393 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 394 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 397 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 400 if (pcbddc->switch_static || pcbddc->dbg_flag) { 401 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 404 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 406 } 407 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 408 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 409 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 if (n_constraints) { 412 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 } 416 417 /* check */ 418 if (pcbddc->dbg_flag) { 419 /* assemble subdomain vector on local nodes */ 420 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 421 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 426 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 427 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 428 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 429 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 430 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 432 if (n_constraints) { 433 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 436 } 437 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 440 /* check saddle point solution */ 441 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 444 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 445 /* shift by the identity matrix */ 446 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 447 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 450 } 451 } 452 453 /* constraints */ 454 for (i=0;i<n_constraints;i++) { 455 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 456 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 457 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 458 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 459 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 460 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 461 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 462 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 463 if (n_vertices) { 464 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 465 } 466 /* Set values in coarse basis function and subdomain part of coarse_mat */ 467 /* coarse basis functions */ 468 j = i+n_vertices; /* don't touch this! */ 469 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 470 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 473 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 474 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 475 if (pcbddc->switch_static || pcbddc->dbg_flag) { 476 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 479 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 481 } 482 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 483 if (n_vertices) { 484 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 485 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 487 } 488 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 491 492 if (pcbddc->dbg_flag) { 493 /* assemble subdomain vector on nodes */ 494 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 495 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 496 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 498 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 499 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 500 /* assemble subdomain vector of lagrange multipliers */ 501 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 502 if (n_vertices) { 503 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 504 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 505 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 506 } 507 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 508 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 510 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 513 /* check saddle point solution */ 514 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 517 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 518 /* shift by the identity matrix */ 519 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 520 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 523 } 524 } 525 /* call assembling routines for local coarse basis */ 526 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 if (pcbddc->switch_static || pcbddc->dbg_flag) { 529 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 } 532 533 /* compute other basis functions for non-symmetric problems */ 534 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 535 if (!setsym || (setsym && !issym)) { 536 if (!pcbddc->coarse_psi_B) { 537 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 538 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 539 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 540 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 541 } 542 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 543 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 544 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 545 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 546 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 547 } 548 for (i=0;i<pcbddc->local_primal_size;i++) { 549 if (n_constraints) { 550 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 551 for (j=0;j<n_constraints;j++) { 552 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 553 } 554 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 555 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 556 } 557 if (i<n_vertices) { 558 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 559 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 560 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 561 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 562 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 563 if (n_constraints) { 564 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 565 } 566 } else { 567 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 568 } 569 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 570 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 571 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 574 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 575 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 576 if (i<n_vertices) { 577 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 578 } 579 if (pcbddc->switch_static || pcbddc->dbg_flag) { 580 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 583 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 584 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 585 } 586 587 if (pcbddc->dbg_flag) { 588 /* assemble subdomain vector on nodes */ 589 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 590 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 591 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 592 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 593 if (i<n_vertices) { 594 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 595 } 596 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 597 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 598 /* assemble subdomain vector of lagrange multipliers */ 599 for (j=0;j<pcbddc->local_primal_size;j++) { 600 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 601 } 602 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 603 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 604 /* check saddle point solution */ 605 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 608 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 609 /* shift by the identity matrix */ 610 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 611 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 614 } 615 } 616 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 if (pcbddc->switch_static || pcbddc->dbg_flag) { 619 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 } 622 } 623 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 624 /* Checking coarse_sub_mat and coarse basis functios */ 625 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 626 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 627 if (pcbddc->dbg_flag) { 628 Mat coarse_sub_mat; 629 Mat AUXMAT,TM1,TM2,TM3,TM4; 630 Mat coarse_phi_D,coarse_phi_B; 631 Mat coarse_psi_D,coarse_psi_B; 632 Mat A_II,A_BB,A_IB,A_BI; 633 MatType checkmattype=MATSEQAIJ; 634 PetscReal real_value; 635 636 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 637 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 638 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 639 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 640 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 641 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 642 if (pcbddc->coarse_psi_B) { 643 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 644 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 645 } 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 647 648 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 649 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 650 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 651 if (pcbddc->coarse_psi_B) { 652 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 653 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 654 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 655 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 656 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 657 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 658 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 659 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 660 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 661 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 662 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 663 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 664 } else { 665 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 666 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 667 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 668 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 669 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 670 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 671 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 672 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 673 } 674 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 675 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 676 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 677 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 678 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 679 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 680 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 685 for (i=0;i<pcbddc->local_primal_size;i++) { 686 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 687 } 688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 689 for (i=0;i<pcbddc->local_primal_size;i++) { 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 691 } 692 if (pcbddc->coarse_psi_B) { 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 694 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 696 } 697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 698 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 700 } 701 } 702 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 703 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 704 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 705 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 706 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 707 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 708 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 709 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 710 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 711 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 712 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 713 if (pcbddc->coarse_psi_B) { 714 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 715 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 716 } 717 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 718 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 719 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 720 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 721 } 722 /* free memory */ 723 if (n_vertices) { 724 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 725 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 726 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 727 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 729 } 730 if (n_constraints) { 731 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 732 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 733 ierr = MatDestroy(&M1);CHKERRQ(ierr); 734 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 735 } 736 ierr = PetscFree(auxindices);CHKERRQ(ierr); 737 /* get back data */ 738 *coarse_submat_vals_n = coarse_submat_vals; 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 744 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 745 { 746 PC_IS* pcis = (PC_IS*)(pc->data); 747 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 748 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 749 PetscBool issbaij,isbaij; 750 /* manage repeated solves */ 751 MatReuse reuse; 752 MatStructure matstruct; 753 PetscErrorCode ierr; 754 755 PetscFunctionBegin; 756 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 757 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 758 } 759 /* get mat flags */ 760 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 761 reuse = MAT_INITIAL_MATRIX; 762 if (pc->setupcalled) { 763 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 764 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 765 if (matstruct == SAME_NONZERO_PATTERN) { 766 reuse = MAT_REUSE_MATRIX; 767 } else { 768 reuse = MAT_INITIAL_MATRIX; 769 } 770 } 771 if (reuse == MAT_INITIAL_MATRIX) { 772 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 773 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 774 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 775 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 776 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 777 } 778 779 /* transform local matrices if needed */ 780 if (pcbddc->use_change_of_basis) { 781 Mat change_mat_all; 782 PetscScalar *row_cmat_values; 783 PetscInt *row_cmat_indices; 784 PetscInt *nnz,*is_indices,*temp_indices; 785 PetscInt i,j,k,n_D,n_B; 786 787 /* Get Non-overlapping dimensions */ 788 n_B = pcis->n_B; 789 n_D = pcis->n-n_B; 790 791 /* compute nonzero structure of change of basis on all local nodes */ 792 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 793 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 794 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 795 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 796 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 797 k=1; 798 for (i=0;i<n_B;i++) { 799 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 800 nnz[is_indices[i]]=j; 801 if (k < j) k = j; 802 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 803 } 804 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 805 /* assemble change of basis matrix on the whole set of local dofs */ 806 ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr); 807 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 808 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 809 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 810 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 811 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 812 for (i=0;i<n_D;i++) { 813 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 814 } 815 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 816 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 817 for (i=0;i<n_B;i++) { 818 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 819 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 820 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 821 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 822 } 823 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 824 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 825 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 826 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 827 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 828 if (!issbaij && !isbaij) { 829 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 830 } else { 831 Mat work_mat; 832 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 833 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 834 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 835 } 836 /* 837 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 838 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 839 */ 840 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 841 ierr = PetscFree(nnz);CHKERRQ(ierr); 842 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 843 } else { 844 /* without change of basis, the local matrix is unchanged */ 845 if (!pcbddc->local_mat) { 846 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 847 pcbddc->local_mat = matis->A; 848 } 849 } 850 851 /* get submatrices */ 852 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 853 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 854 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 855 if (!issbaij) { 856 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 857 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 858 } else { 859 Mat newmat; 860 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 861 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 862 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 863 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 864 } 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 870 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 871 { 872 PC_IS* pcis = (PC_IS*)(pc->data); 873 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 874 IS is_aux1,is_aux2; 875 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 876 PetscInt n_vertices,i,j,n_R,n_D,n_B; 877 PetscInt vbs,bs; 878 PetscBT bitmask; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 /* No need to setup local scatters if primal space is unchanged */ 883 if (!pcbddc->new_primal_space_local) { 884 PetscFunctionReturn(0); 885 } 886 /* destroy old objects */ 887 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 888 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 889 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 890 /* Set Non-overlapping dimensions */ 891 n_B = pcis->n_B; n_D = pcis->n - n_B; 892 n_vertices = pcbddc->n_actual_vertices; 893 /* create auxiliary bitmask */ 894 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 895 for (i=0;i<n_vertices;i++) { 896 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 897 } 898 899 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 900 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 901 for (i=0, n_R=0; i<pcis->n; i++) { 902 if (!PetscBTLookup(bitmask,i)) { 903 idx_R_local[n_R] = i; 904 n_R++; 905 } 906 } 907 908 /* Block code */ 909 vbs = 1; 910 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 911 if (bs>1 && !(n_vertices%bs)) { 912 PetscBool is_blocked = PETSC_TRUE; 913 PetscInt *vary; 914 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 915 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 916 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 917 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 918 for (i=0; i<n_vertices; i++) { 919 if (vary[i]!=0 && vary[i]!=bs) { 920 is_blocked = PETSC_FALSE; 921 break; 922 } 923 } 924 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 925 vbs = bs; 926 for (i=0;i<n_R/vbs;i++) { 927 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 928 } 929 } 930 ierr = PetscFree(vary);CHKERRQ(ierr); 931 } 932 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 933 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 934 935 /* print some info if requested */ 936 if (pcbddc->dbg_flag) { 937 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 938 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 939 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 940 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 941 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 942 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 943 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 944 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 945 } 946 947 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 948 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 949 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 950 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 951 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 952 for (i=0; i<n_D; i++) { 953 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 954 } 955 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 956 for (i=0, j=0; i<n_R; i++) { 957 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 958 aux_array1[j++] = i; 959 } 960 } 961 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 962 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 963 for (i=0, j=0; i<n_B; i++) { 964 if (!PetscBTLookup(bitmask,is_indices[i])) { 965 aux_array2[j++] = i; 966 } 967 } 968 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 969 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 970 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 971 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 972 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 973 974 if (pcbddc->switch_static || pcbddc->dbg_flag) { 975 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 976 for (i=0, j=0; i<n_R; i++) { 977 if (PetscBTLookup(bitmask,idx_R_local[i])) { 978 aux_array1[j++] = i; 979 } 980 } 981 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 982 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 983 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 984 } 985 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 986 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 993 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 994 { 995 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 996 PC_IS *pcis = (PC_IS*)pc->data; 997 PC pc_temp; 998 Mat A_RR; 999 MatStructure matstruct; 1000 MatReuse reuse; 1001 PetscScalar m_one = -1.0; 1002 PetscReal value; 1003 PetscInt n_D,n_R,ibs,mbs; 1004 PetscBool use_exact,use_exact_reduced,issbaij; 1005 PetscErrorCode ierr; 1006 /* prefixes stuff */ 1007 char dir_prefix[256],neu_prefix[256],str_level[3]; 1008 size_t len; 1009 1010 PetscFunctionBegin; 1011 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1012 1013 /* compute prefixes */ 1014 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1015 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1016 if (!pcbddc->current_level) { 1017 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1018 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1019 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1020 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1021 } else { 1022 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1023 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1024 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1025 len -= 15; /* remove "pc_bddc_coarse_" */ 1026 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1027 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1028 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1029 *(dir_prefix+len)='\0'; 1030 *(neu_prefix+len)='\0'; 1031 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1032 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1033 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1034 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1035 } 1036 1037 /* DIRICHLET PROBLEM */ 1038 /* Matrix for Dirichlet problem is pcis->A_II */ 1039 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1040 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1041 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1042 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1043 /* default */ 1044 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1045 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1046 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1047 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1048 if (issbaij) { 1049 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1050 } else { 1051 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1052 } 1053 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1054 } 1055 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1056 /* Allow user's customization */ 1057 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1058 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1059 if (!n_D) { 1060 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1061 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1062 } 1063 /* Set Up KSP for Dirichlet problem of BDDC */ 1064 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1065 /* set ksp_D into pcis data */ 1066 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1067 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1068 pcis->ksp_D = pcbddc->ksp_D; 1069 1070 /* NEUMANN PROBLEM */ 1071 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1072 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1073 if (pcbddc->ksp_R) { /* already created ksp */ 1074 PetscInt nn_R; 1075 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1076 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1077 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1078 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1079 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1080 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1081 reuse = MAT_INITIAL_MATRIX; 1082 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1083 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1084 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1085 reuse = MAT_INITIAL_MATRIX; 1086 } else { /* safe to reuse the matrix */ 1087 reuse = MAT_REUSE_MATRIX; 1088 } 1089 } 1090 /* last check */ 1091 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1092 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1093 reuse = MAT_INITIAL_MATRIX; 1094 } 1095 } else { /* first time, so we need to create the matrix */ 1096 reuse = MAT_INITIAL_MATRIX; 1097 } 1098 /* extract A_RR */ 1099 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1100 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1101 if (ibs != mbs) { 1102 Mat newmat; 1103 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1104 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1105 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1108 } 1109 if (!pcbddc->ksp_R) { /* create object if not present */ 1110 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1111 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1112 /* default */ 1113 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1114 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1115 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1116 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1117 if (issbaij) { 1118 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1119 } else { 1120 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1121 } 1122 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1123 } 1124 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1125 /* Allow user's customization */ 1126 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1127 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1128 if (!n_R) { 1129 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1130 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1131 } 1132 /* Set Up KSP for Neumann problem of BDDC */ 1133 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1134 1135 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1136 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1137 /* Dirichlet */ 1138 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1139 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1140 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1141 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1142 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1143 /* need to be adapted? */ 1144 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1145 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1146 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1147 /* print info */ 1148 if (pcbddc->dbg_flag) { 1149 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1150 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1151 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1152 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1153 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1154 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1155 } 1156 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1157 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1158 } 1159 1160 /* Neumann */ 1161 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1162 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1163 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1164 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1165 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1166 /* need to be adapted? */ 1167 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1168 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1169 /* print info */ 1170 if (pcbddc->dbg_flag) { 1171 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1172 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1173 } 1174 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1175 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1176 } 1177 } 1178 /* free Neumann problem's matrix */ 1179 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1185 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1186 { 1187 PetscErrorCode ierr; 1188 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1189 1190 PetscFunctionBegin; 1191 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1192 if (pcbddc->local_auxmat1) { 1193 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1194 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1195 } 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1201 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1202 { 1203 PetscErrorCode ierr; 1204 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1205 PC_IS* pcis = (PC_IS*) (pc->data); 1206 const PetscScalar zero = 0.0; 1207 1208 PetscFunctionBegin; 1209 /* Application of PHI^T (or PSI^T) */ 1210 if (pcbddc->coarse_psi_B) { 1211 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1212 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1213 } else { 1214 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1215 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1216 } 1217 /* Scatter data of coarse_rhs */ 1218 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1219 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1220 1221 /* Local solution on R nodes */ 1222 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1223 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 if (pcbddc->switch_static) { 1226 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1227 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1228 } 1229 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1230 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1231 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 if (pcbddc->switch_static) { 1234 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 } 1237 1238 /* Coarse solution */ 1239 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1240 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1241 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1242 } 1243 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1244 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1245 1246 /* Sum contributions from two levels */ 1247 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1248 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1254 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1255 { 1256 PetscErrorCode ierr; 1257 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1258 1259 PetscFunctionBegin; 1260 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1266 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1267 { 1268 PetscErrorCode ierr; 1269 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1270 1271 PetscFunctionBegin; 1272 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 /* uncomment for testing purposes */ 1277 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1280 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1281 { 1282 PetscErrorCode ierr; 1283 PC_IS* pcis = (PC_IS*)(pc->data); 1284 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1285 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1286 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1287 MatType impMatType=MATSEQAIJ; 1288 /* one and zero */ 1289 PetscScalar one=1.0,zero=0.0; 1290 /* space to store constraints and their local indices */ 1291 PetscScalar *temp_quadrature_constraint; 1292 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1293 /* iterators */ 1294 PetscInt i,j,k,total_counts,temp_start_ptr; 1295 /* stuff to store connected components stored in pcbddc->mat_graph */ 1296 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1297 PetscInt n_ISForFaces,n_ISForEdges; 1298 /* near null space stuff */ 1299 MatNullSpace nearnullsp; 1300 const Vec *nearnullvecs; 1301 Vec *localnearnullsp; 1302 PetscBool nnsp_has_cnst; 1303 PetscInt nnsp_size; 1304 PetscScalar *array; 1305 /* BLAS integers */ 1306 PetscBLASInt lwork,lierr; 1307 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1308 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1309 /* LAPACK working arrays for SVD or POD */ 1310 PetscBool skip_lapack; 1311 PetscScalar *work; 1312 PetscReal *singular_vals; 1313 #if defined(PETSC_USE_COMPLEX) 1314 PetscReal *rwork; 1315 #endif 1316 #if defined(PETSC_MISSING_LAPACK_GESVD) 1317 PetscBLASInt Blas_one_2=1; 1318 PetscScalar *temp_basis,*correlation_mat; 1319 #else 1320 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1321 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1322 #endif 1323 /* reuse */ 1324 PetscInt olocal_primal_size; 1325 PetscInt *oprimal_indices_local_idxs; 1326 /* change of basis */ 1327 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1328 PetscBool boolforchange,qr_needed; 1329 PetscBT touched,change_basis,qr_needed_idx; 1330 /* auxiliary stuff */ 1331 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1332 /* some quantities */ 1333 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1334 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1335 1336 1337 PetscFunctionBegin; 1338 /* Destroy Mat objects computed previously */ 1339 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1340 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1341 /* Get index sets for faces, edges and vertices from graph */ 1342 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1343 pcbddc->use_vertices = PETSC_TRUE; 1344 } 1345 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1346 /* HACK: provide functions to set change of basis */ 1347 if (!ISForVertices && pcbddc->NullSpace) { 1348 pcbddc->use_change_of_basis = PETSC_TRUE; 1349 pcbddc->use_change_on_faces = PETSC_FALSE; 1350 } 1351 /* print some info */ 1352 if (pcbddc->dbg_flag) { 1353 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1354 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1355 i = 0; 1356 if (ISForVertices) { 1357 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1358 } 1359 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1360 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1362 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1363 } 1364 /* check if near null space is attached to global mat */ 1365 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1366 if (nearnullsp) { 1367 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1368 /* remove any stored info */ 1369 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1370 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1371 /* store information for BDDC solver reuse */ 1372 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1373 pcbddc->onearnullspace = nearnullsp; 1374 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1375 for (i=0;i<nnsp_size;i++) { 1376 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1377 } 1378 } else { /* if near null space is not provided BDDC uses constants by default */ 1379 nnsp_size = 0; 1380 nnsp_has_cnst = PETSC_TRUE; 1381 } 1382 /* get max number of constraints on a single cc */ 1383 max_constraints = nnsp_size; 1384 if (nnsp_has_cnst) max_constraints++; 1385 1386 /* 1387 Evaluate maximum storage size needed by the procedure 1388 - temp_indices will contain start index of each constraint stored as follows 1389 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1390 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1391 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1392 */ 1393 total_counts = n_ISForFaces+n_ISForEdges; 1394 total_counts *= max_constraints; 1395 n_vertices = 0; 1396 if (ISForVertices) { 1397 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1398 } 1399 total_counts += n_vertices; 1400 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1401 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1402 total_counts = 0; 1403 max_size_of_constraint = 0; 1404 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1405 if (i<n_ISForEdges) { 1406 used_IS = &ISForEdges[i]; 1407 } else { 1408 used_IS = &ISForFaces[i-n_ISForEdges]; 1409 } 1410 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1411 total_counts += j; 1412 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1413 } 1414 total_counts *= max_constraints; 1415 total_counts += n_vertices; 1416 ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr); 1417 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr); 1418 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1419 /* get local part of global near null space vectors */ 1420 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1421 for (k=0;k<nnsp_size;k++) { 1422 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1423 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1424 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1425 } 1426 1427 /* whether or not to skip lapack calls */ 1428 skip_lapack = PETSC_TRUE; 1429 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1430 1431 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1432 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1433 PetscScalar temp_work; 1434 #if defined(PETSC_MISSING_LAPACK_GESVD) 1435 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1436 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1437 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1438 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1439 #if defined(PETSC_USE_COMPLEX) 1440 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1441 #endif 1442 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1443 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1444 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1445 lwork = -1; 1446 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1447 #if !defined(PETSC_USE_COMPLEX) 1448 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1449 #else 1450 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1451 #endif 1452 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1453 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1454 #else /* on missing GESVD */ 1455 /* SVD */ 1456 PetscInt max_n,min_n; 1457 max_n = max_size_of_constraint; 1458 min_n = max_constraints; 1459 if (max_size_of_constraint < max_constraints) { 1460 min_n = max_size_of_constraint; 1461 max_n = max_constraints; 1462 } 1463 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1464 #if defined(PETSC_USE_COMPLEX) 1465 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1466 #endif 1467 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1468 lwork = -1; 1469 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1470 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1471 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1472 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1473 #if !defined(PETSC_USE_COMPLEX) 1474 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1475 #else 1476 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1477 #endif 1478 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1479 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1480 #endif /* on missing GESVD */ 1481 /* Allocate optimal workspace */ 1482 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1483 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1484 } 1485 /* Now we can loop on constraining sets */ 1486 total_counts = 0; 1487 temp_indices[0] = 0; 1488 /* vertices */ 1489 if (ISForVertices) { 1490 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1491 if (nnsp_has_cnst) { /* consider all vertices */ 1492 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1493 for (i=0;i<n_vertices;i++) { 1494 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1495 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1496 total_counts++; 1497 } 1498 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1499 PetscBool used_vertex; 1500 for (i=0;i<n_vertices;i++) { 1501 used_vertex = PETSC_FALSE; 1502 k = 0; 1503 while (!used_vertex && k<nnsp_size) { 1504 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1505 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1506 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1507 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1508 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1509 total_counts++; 1510 used_vertex = PETSC_TRUE; 1511 } 1512 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1513 k++; 1514 } 1515 } 1516 } 1517 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1518 n_vertices = total_counts; 1519 } 1520 1521 /* edges and faces */ 1522 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1523 if (i<n_ISForEdges) { 1524 used_IS = &ISForEdges[i]; 1525 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1526 } else { 1527 used_IS = &ISForFaces[i-n_ISForEdges]; 1528 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1529 } 1530 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1531 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1532 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1533 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1534 /* change of basis should not be performed on local periodic nodes */ 1535 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1536 if (nnsp_has_cnst) { 1537 PetscScalar quad_value; 1538 temp_constraints++; 1539 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1540 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1541 for (j=0;j<size_of_constraint;j++) { 1542 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1543 } 1544 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1545 total_counts++; 1546 } 1547 for (k=0;k<nnsp_size;k++) { 1548 PetscReal real_value; 1549 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1550 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1551 for (j=0;j<size_of_constraint;j++) { 1552 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1553 } 1554 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1555 /* check if array is null on the connected component */ 1556 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1557 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1558 if (real_value > 0.0) { /* keep indices and values */ 1559 temp_constraints++; 1560 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1561 total_counts++; 1562 } 1563 } 1564 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1565 valid_constraints = temp_constraints; 1566 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1567 if (!pcbddc->use_nnsp_true && temp_constraints) { 1568 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1569 1570 #if defined(PETSC_MISSING_LAPACK_GESVD) 1571 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1572 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1573 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1574 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1575 from that computed using LAPACKgesvd 1576 -> This is due to a different computation of eigenvectors in LAPACKheev 1577 -> The quality of the POD-computed basis will be the same */ 1578 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1579 /* Store upper triangular part of correlation matrix */ 1580 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1581 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1582 for (j=0;j<temp_constraints;j++) { 1583 for (k=0;k<j+1;k++) { 1584 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1585 } 1586 } 1587 /* compute eigenvalues and eigenvectors of correlation matrix */ 1588 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1589 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1590 #if !defined(PETSC_USE_COMPLEX) 1591 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1592 #else 1593 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1594 #endif 1595 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1596 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1597 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1598 j=0; 1599 while (j < temp_constraints && singular_vals[j] < tol) j++; 1600 total_counts=total_counts-j; 1601 valid_constraints = temp_constraints-j; 1602 /* scale and copy POD basis into used quadrature memory */ 1603 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1604 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1605 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1606 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1607 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1608 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1609 if (j<temp_constraints) { 1610 PetscInt ii; 1611 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1612 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1613 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1614 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1615 for (k=0;k<temp_constraints-j;k++) { 1616 for (ii=0;ii<size_of_constraint;ii++) { 1617 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1618 } 1619 } 1620 } 1621 #else /* on missing GESVD */ 1622 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1623 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1624 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1625 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1626 #if !defined(PETSC_USE_COMPLEX) 1627 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1628 #else 1629 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1630 #endif 1631 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1632 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1633 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1634 k = temp_constraints; 1635 if (k > size_of_constraint) k = size_of_constraint; 1636 j = 0; 1637 while (j < k && singular_vals[k-j-1] < tol) j++; 1638 total_counts = total_counts-temp_constraints+k-j; 1639 valid_constraints = k-j; 1640 #endif /* on missing GESVD */ 1641 } 1642 /* setting change_of_basis flag is safe now */ 1643 if (boolforchange) { 1644 for (j=0;j<valid_constraints;j++) { 1645 PetscBTSet(change_basis,total_counts-j-1); 1646 } 1647 } 1648 } 1649 /* free index sets of faces, edges and vertices */ 1650 for (i=0;i<n_ISForFaces;i++) { 1651 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1652 } 1653 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1654 for (i=0;i<n_ISForEdges;i++) { 1655 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1656 } 1657 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1658 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1659 /* map temp_indices_to_constraint in boundary numbering */ 1660 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1661 if (i != temp_indices[total_counts]) { 1662 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1663 } 1664 1665 /* free workspace */ 1666 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1667 ierr = PetscFree(work);CHKERRQ(ierr); 1668 #if defined(PETSC_USE_COMPLEX) 1669 ierr = PetscFree(rwork);CHKERRQ(ierr); 1670 #endif 1671 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1672 #if defined(PETSC_MISSING_LAPACK_GESVD) 1673 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1674 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1675 #endif 1676 } 1677 for (k=0;k<nnsp_size;k++) { 1678 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1679 } 1680 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1681 1682 /* set quantities in pcbddc data structure and store previous primal size */ 1683 /* n_vertices defines the number of subdomain corners in the primal space */ 1684 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1685 olocal_primal_size = pcbddc->local_primal_size; 1686 pcbddc->local_primal_size = total_counts; 1687 pcbddc->n_vertices = n_vertices; 1688 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1689 1690 /* Create constraint matrix */ 1691 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1692 /* so we need to set it up properly either with or without change of basis */ 1693 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1694 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1695 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1696 /* array to compute a local numbering of constraints : vertices first then constraints */ 1697 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1698 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1699 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1700 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1701 /* auxiliary stuff for basis change */ 1702 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1703 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1704 1705 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1706 total_primal_vertices=0; 1707 for (i=0;i<pcbddc->local_primal_size;i++) { 1708 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1709 if (size_of_constraint == 1) { 1710 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1711 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1712 aux_primal_minloc[total_primal_vertices]=0; 1713 total_primal_vertices++; 1714 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1715 PetscInt min_loc,min_index; 1716 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1717 /* find first untouched local node */ 1718 k = 0; 1719 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1720 min_index = global_indices[k]; 1721 min_loc = k; 1722 /* search the minimum among global nodes already untouched on the cc */ 1723 for (k=1;k<size_of_constraint;k++) { 1724 /* there can be more than one constraint on a single connected component */ 1725 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1726 min_index = global_indices[k]; 1727 min_loc = k; 1728 } 1729 } 1730 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1731 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1732 aux_primal_minloc[total_primal_vertices]=min_loc; 1733 total_primal_vertices++; 1734 } 1735 } 1736 /* determine if a QR strategy is needed for change of basis */ 1737 qr_needed = PETSC_FALSE; 1738 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1739 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1740 if (PetscBTLookup(change_basis,i)) { 1741 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1742 j = 0; 1743 for (k=0;k<size_of_constraint;k++) { 1744 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1745 j++; 1746 } 1747 } 1748 /* found more than one primal dof on the cc */ 1749 if (j > 1) { 1750 PetscBTSet(qr_needed_idx,i); 1751 qr_needed = PETSC_TRUE; 1752 } 1753 } 1754 } 1755 /* free workspace */ 1756 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1757 1758 /* permute indices in order to have a sorted set of vertices */ 1759 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1760 1761 /* nonzero structure of constraint matrix */ 1762 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1763 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1764 j=total_primal_vertices; 1765 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1766 if (!PetscBTLookup(change_basis,i)) { 1767 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1768 j++; 1769 } 1770 } 1771 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1772 ierr = PetscFree(nnz);CHKERRQ(ierr); 1773 /* set values in constraint matrix */ 1774 for (i=0;i<total_primal_vertices;i++) { 1775 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1776 } 1777 total_counts = total_primal_vertices; 1778 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1779 if (!PetscBTLookup(change_basis,i)) { 1780 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1781 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1782 total_counts++; 1783 } 1784 } 1785 /* assembling */ 1786 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1787 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1788 /* 1789 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1790 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1791 */ 1792 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1793 if (pcbddc->use_change_of_basis) { 1794 /* dual and primal dofs on a single cc */ 1795 PetscInt dual_dofs,primal_dofs; 1796 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1797 PetscInt primal_counter; 1798 /* working stuff for GEQRF */ 1799 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1800 PetscBLASInt lqr_work; 1801 /* working stuff for UNGQR */ 1802 PetscScalar *gqr_work,lgqr_work_t; 1803 PetscBLASInt lgqr_work; 1804 /* working stuff for TRTRS */ 1805 PetscScalar *trs_rhs; 1806 PetscBLASInt Blas_NRHS; 1807 /* pointers for values insertion into change of basis matrix */ 1808 PetscInt *start_rows,*start_cols; 1809 PetscScalar *start_vals; 1810 /* working stuff for values insertion */ 1811 PetscBT is_primal; 1812 1813 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1814 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1815 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1816 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1817 /* work arrays */ 1818 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1819 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1820 /* nonzeros per row */ 1821 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1822 if (PetscBTLookup(change_basis,i)) { 1823 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1824 if (PetscBTLookup(qr_needed_idx,i)) { 1825 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1826 } else { 1827 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1828 /* get local primal index on the cc */ 1829 j = 0; 1830 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1831 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1832 } 1833 } 1834 } 1835 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1836 ierr = PetscFree(nnz);CHKERRQ(ierr); 1837 /* Set initial identity in the matrix */ 1838 for (i=0;i<pcis->n_B;i++) { 1839 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1840 } 1841 1842 if (pcbddc->dbg_flag) { 1843 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1844 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1845 } 1846 1847 1848 /* Now we loop on the constraints which need a change of basis */ 1849 /* 1850 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1851 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1852 1853 Basic blocks of change of basis matrix T computed 1854 1855 - Using the following block transformation if there is only a primal dof on the cc 1856 (in the example, primal dof is the last one of the edge in LOCAL ordering 1857 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1858 | 1 0 ... 0 1 | 1859 | 0 1 ... 0 1 | 1860 | ... | 1861 | 0 ... 1 1 | 1862 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1863 1864 - via QR decomposition of constraints otherwise 1865 */ 1866 if (qr_needed) { 1867 /* space to store Q */ 1868 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 1869 /* first we issue queries for optimal work */ 1870 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1871 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1872 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1873 lqr_work = -1; 1874 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1875 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1876 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1877 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 1878 lgqr_work = -1; 1879 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1880 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1881 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1882 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1883 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1884 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1885 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1886 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1887 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 1888 /* array to store scaling factors for reflectors */ 1889 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 1890 /* array to store rhs and solution of triangular solver */ 1891 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 1892 /* allocating workspace for check */ 1893 if (pcbddc->dbg_flag) { 1894 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 1895 } 1896 } 1897 /* array to store whether a node is primal or not */ 1898 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1899 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 1900 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1901 if (i != total_primal_vertices) { 1902 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1903 } 1904 for (i=0;i<total_primal_vertices;i++) { 1905 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1906 } 1907 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1908 1909 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1910 /* -> using implicit ordering contained in temp_indices data */ 1911 total_counts = pcbddc->n_vertices; 1912 primal_counter = total_counts; 1913 while (total_counts<pcbddc->local_primal_size) { 1914 primal_dofs = 1; 1915 if (PetscBTLookup(change_basis,total_counts)) { 1916 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1917 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1918 primal_dofs++; 1919 } 1920 /* get constraint info */ 1921 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1922 dual_dofs = size_of_constraint-primal_dofs; 1923 1924 if (pcbddc->dbg_flag) { 1925 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1926 } 1927 1928 if (primal_dofs > 1) { /* QR */ 1929 1930 /* copy quadrature constraints for change of basis check */ 1931 if (pcbddc->dbg_flag) { 1932 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1933 } 1934 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1935 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1936 1937 /* compute QR decomposition of constraints */ 1938 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1939 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1940 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1941 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1942 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1943 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1944 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1945 1946 /* explictly compute R^-T */ 1947 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1948 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1949 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1950 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1951 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1952 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1953 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1954 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1955 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1956 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1957 1958 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1959 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1960 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1961 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1962 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1963 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1964 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1965 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1966 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1967 1968 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1969 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1970 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1971 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1972 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1973 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1974 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1975 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1977 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1978 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1979 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1980 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1981 1982 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1983 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1984 /* insert cols for primal dofs */ 1985 for (j=0;j<primal_dofs;j++) { 1986 start_vals = &qr_basis[j*size_of_constraint]; 1987 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1988 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1989 } 1990 /* insert cols for dual dofs */ 1991 for (j=0,k=0;j<dual_dofs;k++) { 1992 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1993 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1994 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1995 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1996 j++; 1997 } 1998 } 1999 2000 /* check change of basis */ 2001 if (pcbddc->dbg_flag) { 2002 PetscInt ii,jj; 2003 PetscBool valid_qr=PETSC_TRUE; 2004 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2005 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2006 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2007 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2008 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2009 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2010 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2011 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2012 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2013 for (jj=0;jj<size_of_constraint;jj++) { 2014 for (ii=0;ii<primal_dofs;ii++) { 2015 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2016 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2017 } 2018 } 2019 if (!valid_qr) { 2020 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2021 for (jj=0;jj<size_of_constraint;jj++) { 2022 for (ii=0;ii<primal_dofs;ii++) { 2023 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2024 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2025 } 2026 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2027 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2028 } 2029 } 2030 } 2031 } else { 2032 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2033 } 2034 } 2035 } else { /* simple transformation block */ 2036 PetscInt row,col; 2037 PetscScalar val; 2038 for (j=0;j<size_of_constraint;j++) { 2039 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2040 if (!PetscBTLookup(is_primal,row)) { 2041 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2042 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2043 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2044 } else { 2045 for (k=0;k<size_of_constraint;k++) { 2046 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2047 if (row != col) { 2048 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2049 } else { 2050 val = 1.0; 2051 } 2052 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2053 } 2054 } 2055 } 2056 } 2057 /* increment primal counter */ 2058 primal_counter += primal_dofs; 2059 } else { 2060 if (pcbddc->dbg_flag) { 2061 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2062 } 2063 } 2064 /* increment constraint counter total_counts */ 2065 total_counts += primal_dofs; 2066 } 2067 2068 /* free workspace */ 2069 if (qr_needed) { 2070 if (pcbddc->dbg_flag) { 2071 ierr = PetscFree(work);CHKERRQ(ierr); 2072 } 2073 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2074 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2075 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2076 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2077 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2078 } 2079 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2080 /* assembling */ 2081 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2083 /* 2084 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2085 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2086 */ 2087 } 2088 2089 /* get indices in local ordering for vertices and constraints */ 2090 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2091 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2092 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2093 } 2094 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2095 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2096 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2097 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2098 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2099 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2100 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2101 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2102 /* set quantities in PCBDDC data struct */ 2103 pcbddc->n_actual_vertices = i; 2104 /* check if a new primal space has been introduced */ 2105 pcbddc->new_primal_space_local = PETSC_TRUE; 2106 if (olocal_primal_size == pcbddc->local_primal_size) { 2107 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2108 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2109 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2110 } 2111 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2112 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2113 2114 /* flush dbg viewer */ 2115 if (pcbddc->dbg_flag) { 2116 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2117 } 2118 2119 /* free workspace */ 2120 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2121 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2122 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2123 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2124 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2125 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2126 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2127 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2133 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2134 { 2135 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2136 PC_IS *pcis = (PC_IS*)pc->data; 2137 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2138 PetscInt bs,ierr,i,vertex_size; 2139 PetscViewer viewer=pcbddc->dbg_viewer; 2140 2141 PetscFunctionBegin; 2142 /* Reset previously computed graph */ 2143 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2144 /* Init local Graph struct */ 2145 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2146 2147 /* Check validity of the csr graph passed in by the user */ 2148 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2149 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2150 } 2151 2152 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2153 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2154 Mat mat_adj; 2155 const PetscInt *xadj,*adjncy; 2156 PetscBool flg_row=PETSC_TRUE; 2157 2158 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2159 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2160 if (!flg_row) { 2161 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2162 } 2163 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2164 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2165 if (!flg_row) { 2166 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2167 } 2168 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2169 } 2170 2171 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2172 vertex_size = 1; 2173 if (!pcbddc->user_provided_isfordofs) { 2174 if (!pcbddc->n_ISForDofs) { 2175 IS *custom_ISForDofs; 2176 2177 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2178 ierr = PetscMalloc1(bs,&custom_ISForDofs);CHKERRQ(ierr); 2179 for (i=0;i<bs;i++) { 2180 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2181 } 2182 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2183 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2184 /* remove my references to IS objects */ 2185 for (i=0;i<bs;i++) { 2186 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2187 } 2188 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2189 } 2190 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2191 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2192 } 2193 2194 /* Setup of Graph */ 2195 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2196 2197 /* Graph's connected components analysis */ 2198 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2199 2200 /* print some info to stdout */ 2201 if (pcbddc->dbg_flag) { 2202 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2203 } 2204 2205 /* mark topography has done */ 2206 pcbddc->recompute_topography = PETSC_FALSE; 2207 PetscFunctionReturn(0); 2208 } 2209 2210 #undef __FUNCT__ 2211 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2212 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2213 { 2214 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2215 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2216 PetscErrorCode ierr; 2217 2218 PetscFunctionBegin; 2219 n = 0; 2220 vertices = 0; 2221 if (pcbddc->ConstraintMatrix) { 2222 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2223 for (i=0;i<local_primal_size;i++) { 2224 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2225 if (size_of_constraint == 1) n++; 2226 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2227 } 2228 if (vertices_idx) { 2229 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2230 n = 0; 2231 for (i=0;i<local_primal_size;i++) { 2232 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2233 if (size_of_constraint == 1) { 2234 vertices[n++]=row_cmat_indices[0]; 2235 } 2236 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2237 } 2238 } 2239 } 2240 *n_vertices = n; 2241 if (vertices_idx) *vertices_idx = vertices; 2242 PetscFunctionReturn(0); 2243 } 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2247 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2248 { 2249 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2250 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2251 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2252 PetscBT touched; 2253 PetscErrorCode ierr; 2254 2255 /* This function assumes that the number of local constraints per connected component 2256 is not greater than the number of nodes defined for the connected component 2257 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2258 PetscFunctionBegin; 2259 n = 0; 2260 constraints_index = 0; 2261 if (pcbddc->ConstraintMatrix) { 2262 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2263 max_size_of_constraint = 0; 2264 for (i=0;i<local_primal_size;i++) { 2265 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2266 if (size_of_constraint > 1) { 2267 n++; 2268 } 2269 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2270 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2271 } 2272 if (constraints_idx) { 2273 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2274 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2275 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2276 n = 0; 2277 for (i=0;i<local_primal_size;i++) { 2278 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2279 if (size_of_constraint > 1) { 2280 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2281 /* find first untouched local node */ 2282 j = 0; 2283 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2284 min_index = row_cmat_global_indices[j]; 2285 min_loc = j; 2286 /* search the minimum among nodes not yet touched on the connected component 2287 since there can be more than one constraint on a single cc */ 2288 for (j=1;j<size_of_constraint;j++) { 2289 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2290 min_index = row_cmat_global_indices[j]; 2291 min_loc = j; 2292 } 2293 } 2294 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2295 constraints_index[n++] = row_cmat_indices[min_loc]; 2296 } 2297 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2298 } 2299 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2300 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2301 } 2302 } 2303 *n_constraints = n; 2304 if (constraints_idx) *constraints_idx = constraints_index; 2305 PetscFunctionReturn(0); 2306 } 2307 2308 /* the next two functions has been adapted from pcis.c */ 2309 #undef __FUNCT__ 2310 #define __FUNCT__ "PCBDDCApplySchur" 2311 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2312 { 2313 PetscErrorCode ierr; 2314 PC_IS *pcis = (PC_IS*)(pc->data); 2315 2316 PetscFunctionBegin; 2317 if (!vec2_B) { vec2_B = v; } 2318 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2319 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2320 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2321 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2322 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2323 PetscFunctionReturn(0); 2324 } 2325 2326 #undef __FUNCT__ 2327 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2328 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2329 { 2330 PetscErrorCode ierr; 2331 PC_IS *pcis = (PC_IS*)(pc->data); 2332 2333 PetscFunctionBegin; 2334 if (!vec2_B) { vec2_B = v; } 2335 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2336 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2337 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2338 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2339 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "PCBDDCSubsetNumbering" 2345 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2346 { 2347 Vec local_vec,global_vec; 2348 IS seqis,paris; 2349 VecScatter scatter_ctx; 2350 PetscScalar *array; 2351 PetscInt *temp_global_dofs; 2352 PetscScalar globalsum; 2353 PetscInt i,j,s; 2354 PetscInt nlocals,first_index,old_index,max_local; 2355 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2356 PetscMPIInt *dof_sizes,*dof_displs; 2357 PetscBool first_found; 2358 PetscErrorCode ierr; 2359 2360 PetscFunctionBegin; 2361 /* mpi buffers */ 2362 MPI_Comm_size(comm,&size_prec_comm); 2363 MPI_Comm_rank(comm,&rank_prec_comm); 2364 j = ( !rank_prec_comm ? size_prec_comm : 0); 2365 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2366 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2367 /* get maximum size of subset */ 2368 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2369 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2370 max_local = 0; 2371 if (n_local_dofs) { 2372 max_local = temp_global_dofs[0]; 2373 for (i=1;i<n_local_dofs;i++) { 2374 if (max_local < temp_global_dofs[i] ) { 2375 max_local = temp_global_dofs[i]; 2376 } 2377 } 2378 } 2379 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2380 max_global++; 2381 max_local = 0; 2382 if (n_local_dofs) { 2383 max_local = local_dofs[0]; 2384 for (i=1;i<n_local_dofs;i++) { 2385 if (max_local < local_dofs[i] ) { 2386 max_local = local_dofs[i]; 2387 } 2388 } 2389 } 2390 max_local++; 2391 /* allocate workspace */ 2392 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2393 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2394 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2395 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2396 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2397 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2398 /* create scatter */ 2399 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2400 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2401 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2402 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2403 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2404 /* init array */ 2405 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2406 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2407 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2408 if (local_dofs_mult) { 2409 for (i=0;i<n_local_dofs;i++) { 2410 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2411 } 2412 } else { 2413 for (i=0;i<n_local_dofs;i++) { 2414 array[local_dofs[i]]=1.0; 2415 } 2416 } 2417 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2418 /* scatter into global vec and get total number of global dofs */ 2419 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2420 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2421 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2422 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2423 /* Fill global_vec with cumulative function for global numbering */ 2424 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2425 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2426 nlocals = 0; 2427 first_index = -1; 2428 first_found = PETSC_FALSE; 2429 for (i=0;i<s;i++) { 2430 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2431 first_found = PETSC_TRUE; 2432 first_index = i; 2433 } 2434 nlocals += (PetscInt)PetscRealPart(array[i]); 2435 } 2436 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2437 if (!rank_prec_comm) { 2438 dof_displs[0]=0; 2439 for (i=1;i<size_prec_comm;i++) { 2440 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2441 } 2442 } 2443 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2444 if (first_found) { 2445 array[first_index] += (PetscScalar)nlocals; 2446 old_index = first_index; 2447 for (i=first_index+1;i<s;i++) { 2448 if (PetscRealPart(array[i]) > 0.0) { 2449 array[i] += array[old_index]; 2450 old_index = i; 2451 } 2452 } 2453 } 2454 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2455 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2456 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2457 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2458 /* get global ordering of local dofs */ 2459 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2460 if (local_dofs_mult) { 2461 for (i=0;i<n_local_dofs;i++) { 2462 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2463 } 2464 } else { 2465 for (i=0;i<n_local_dofs;i++) { 2466 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2467 } 2468 } 2469 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2470 /* free workspace */ 2471 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2472 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2473 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2474 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2475 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2476 /* return pointer to global ordering of local dofs */ 2477 *global_numbering_subset = temp_global_dofs; 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2483 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2484 { 2485 PetscInt i,j; 2486 PetscScalar *alphas; 2487 PetscErrorCode ierr; 2488 2489 PetscFunctionBegin; 2490 /* this implements stabilized Gram-Schmidt */ 2491 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2492 for (i=0;i<n;i++) { 2493 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2494 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2495 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2496 } 2497 ierr = PetscFree(alphas);CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 /* TODO 2502 - now preallocation is done assuming SEQDENSE local matrices 2503 */ 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatISGetMPIXAIJ" 2506 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2507 { 2508 Mat new_mat; 2509 Mat_IS *matis = (Mat_IS*)(mat->data); 2510 /* info on mat */ 2511 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2512 PetscInt bs,rows,cols; 2513 PetscInt lrows,lcols; 2514 PetscInt local_rows,local_cols; 2515 PetscBool isdense; 2516 /* values insertion */ 2517 PetscScalar *array; 2518 PetscInt *local_indices,*global_indices; 2519 /* work */ 2520 PetscInt i,j,index_row; 2521 PetscErrorCode ierr; 2522 2523 PetscFunctionBegin; 2524 /* MISSING CHECKS 2525 - rectangular case not covered (it is not allowed by MATIS) 2526 */ 2527 /* get info from mat */ 2528 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2529 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2530 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2531 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2532 2533 /* work */ 2534 ierr = PetscMalloc1(local_rows,&local_indices);CHKERRQ(ierr); 2535 for (i=0;i<local_rows;i++) local_indices[i]=i; 2536 /* map indices of local mat to global values */ 2537 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2538 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2539 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2540 2541 if (reuse==MAT_INITIAL_MATRIX) { 2542 Vec vec_dnz,vec_onz; 2543 PetscScalar *my_dnz,*my_onz; 2544 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2545 PetscInt index_col,owner; 2546 PetscMPIInt nsubdomains; 2547 2548 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2549 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2550 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2551 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2552 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2553 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2554 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2555 2556 /* 2557 preallocation 2558 */ 2559 2560 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2561 /* 2562 Some vectors are needed to sum up properly on shared interface dofs. 2563 Preallocation macros cannot do the job. 2564 Note that preallocation is not exact, since it overestimates nonzeros 2565 */ 2566 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2567 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2568 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2569 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2570 /* All processes need to compute entire row ownership */ 2571 ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr); 2572 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2573 for (i=0;i<nsubdomains;i++) { 2574 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2575 row_ownership[j]=i; 2576 } 2577 } 2578 2579 /* 2580 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2581 then, they will be summed up properly. This way, preallocation is always sufficient 2582 */ 2583 ierr = PetscMalloc1(local_rows,&my_dnz);CHKERRQ(ierr); 2584 ierr = PetscMalloc1(local_rows,&my_onz);CHKERRQ(ierr); 2585 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2586 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2587 for (i=0;i<local_rows;i++) { 2588 index_row = global_indices[i]; 2589 for (j=i;j<local_rows;j++) { 2590 owner = row_ownership[index_row]; 2591 index_col = global_indices[j]; 2592 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2593 my_dnz[i] += 1.0; 2594 } else { /* offdiag block */ 2595 my_onz[i] += 1.0; 2596 } 2597 /* same as before, interchanging rows and cols */ 2598 if (i != j) { 2599 owner = row_ownership[index_col]; 2600 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2601 my_dnz[j] += 1.0; 2602 } else { 2603 my_onz[j] += 1.0; 2604 } 2605 } 2606 } 2607 } 2608 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2609 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2610 if (local_rows) { /* multilevel guard */ 2611 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2612 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2613 } 2614 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2615 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2616 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2617 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2618 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2619 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2620 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2621 2622 /* set computed preallocation in dnz and onz */ 2623 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2624 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2625 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2626 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2627 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2628 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2629 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2630 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2631 2632 /* Resize preallocation if overestimated */ 2633 for (i=0;i<lrows;i++) { 2634 dnz[i] = PetscMin(dnz[i],lcols); 2635 onz[i] = PetscMin(onz[i],cols-lcols); 2636 } 2637 /* set preallocation */ 2638 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2639 for (i=0;i<lrows/bs;i++) { 2640 dnz[i] = dnz[i*bs]/bs; 2641 onz[i] = onz[i*bs]/bs; 2642 } 2643 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2644 for (i=0;i<lrows/bs;i++) { 2645 dnz[i] = dnz[i]-i; 2646 } 2647 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2648 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2649 *M = new_mat; 2650 } else { 2651 PetscInt mbs,mrows,mcols; 2652 /* some checks */ 2653 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2654 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2655 if (mrows != rows) { 2656 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2657 } 2658 if (mrows != rows) { 2659 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2660 } 2661 if (mbs != bs) { 2662 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2663 } 2664 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2665 } 2666 /* set local to global mappings */ 2667 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2668 /* Set values */ 2669 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2670 if (isdense) { /* special case for dense local matrices */ 2671 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2672 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2673 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2674 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2675 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2676 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2677 } else { /* very basic values insertion for all other matrix types */ 2678 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2679 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2680 for (i=0;i<local_rows;i++) { 2681 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2682 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2683 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2684 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2685 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2686 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2687 } 2688 } 2689 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2690 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2691 if (isdense) { 2692 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2693 } 2694 PetscFunctionReturn(0); 2695 } 2696 2697 #undef __FUNCT__ 2698 #define __FUNCT__ "MatISSubassemble_Private" 2699 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2700 { 2701 Mat subdomain_adj; 2702 IS new_ranks,ranks_send_to; 2703 MatPartitioning partitioner; 2704 Mat_IS *matis; 2705 PetscInt n_neighs,*neighs,*n_shared,**shared; 2706 PetscInt prank; 2707 PetscMPIInt size,rank,color; 2708 PetscInt *xadj,*adjncy,*oldranks; 2709 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2710 PetscInt i,j,n_subdomains,local_size,threshold=0; 2711 PetscErrorCode ierr; 2712 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2713 PetscSubcomm subcomm; 2714 2715 PetscFunctionBegin; 2716 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2717 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2718 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2719 2720 /* Get info on mapping */ 2721 matis = (Mat_IS*)(mat->data); 2722 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2723 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2724 2725 /* build local CSR graph of subdomains' connectivity */ 2726 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2727 xadj[0] = 0; 2728 xadj[1] = PetscMax(n_neighs-1,0); 2729 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2730 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2731 2732 if (threshold) { 2733 PetscInt* count,min_threshold; 2734 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2735 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2736 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2737 for (j=0;j<n_shared[i];j++) { 2738 count[shared[i][j]] += 1; 2739 } 2740 } 2741 /* adapt threshold since we dont want to lose significant connections */ 2742 min_threshold = n_neighs; 2743 for (i=1;i<n_neighs;i++) { 2744 for (j=0;j<n_shared[i];j++) { 2745 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2746 } 2747 } 2748 threshold = PetscMax(min_threshold+1,threshold); 2749 xadj[1] = 0; 2750 for (i=1;i<n_neighs;i++) { 2751 for (j=0;j<n_shared[i];j++) { 2752 if (count[shared[i][j]] < threshold) { 2753 adjncy[xadj[1]] = neighs[i]; 2754 adjncy_wgt[xadj[1]] = n_shared[i]; 2755 xadj[1]++; 2756 break; 2757 } 2758 } 2759 } 2760 ierr = PetscFree(count);CHKERRQ(ierr); 2761 } else { 2762 if (xadj[1]) { 2763 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2764 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2765 } 2766 } 2767 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2768 if (use_square) { 2769 for (i=0;i<xadj[1];i++) { 2770 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2771 } 2772 } 2773 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2774 2775 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2776 2777 /* 2778 Restrict work on active processes only. 2779 */ 2780 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2781 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2782 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2783 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2784 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2785 if (color) { 2786 ierr = PetscFree(xadj);CHKERRQ(ierr); 2787 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2788 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2789 } else { 2790 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2791 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2792 prank = rank; 2793 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2794 for (i=0;i<size;i++) { 2795 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2796 } 2797 for (i=0;i<xadj[1];i++) { 2798 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2799 } 2800 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2801 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2802 n_subdomains = (PetscInt)size; 2803 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2804 2805 /* Partition */ 2806 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2807 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2808 if (use_vwgt) { 2809 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2810 v_wgt[0] = local_size; 2811 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2812 } 2813 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2814 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2815 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2816 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2817 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2818 2819 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2820 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2821 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2822 /* clean up */ 2823 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2824 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2825 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2826 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2827 } 2828 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2829 2830 /* assemble parallel IS for sends */ 2831 i = 1; 2832 if (color) i=0; 2833 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2834 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2835 2836 /* get back IS */ 2837 *is_sends = ranks_send_to; 2838 PetscFunctionReturn(0); 2839 } 2840 2841 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2842 2843 #undef __FUNCT__ 2844 #define __FUNCT__ "MatISSubassemble" 2845 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2846 { 2847 Mat local_mat,new_mat; 2848 Mat_IS *matis; 2849 IS is_sends_internal; 2850 PetscInt rows,cols; 2851 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2852 PetscBool ismatis,isdense; 2853 ISLocalToGlobalMapping l2gmap; 2854 PetscInt* l2gmap_indices; 2855 const PetscInt* is_indices; 2856 MatType new_local_type; 2857 MatTypePrivate new_local_type_private; 2858 /* buffers */ 2859 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2860 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2861 /* MPI */ 2862 MPI_Comm comm; 2863 PetscMPIInt n_sends,n_recvs,commsize; 2864 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2865 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2866 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2867 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2868 PetscErrorCode ierr; 2869 2870 PetscFunctionBegin; 2871 /* checks */ 2872 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2873 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2874 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2875 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2876 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2877 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2878 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2879 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2880 PetscValidLogicalCollectiveInt(mat,bs,0); 2881 /* prepare IS for sending if not provided */ 2882 if (!is_sends) { 2883 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2884 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2885 } else { 2886 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2887 is_sends_internal = is_sends; 2888 } 2889 2890 /* get pointer of MATIS data */ 2891 matis = (Mat_IS*)mat->data; 2892 2893 /* get comm */ 2894 comm = PetscObjectComm((PetscObject)mat); 2895 2896 /* compute number of sends */ 2897 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2898 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2899 2900 /* compute number of receives */ 2901 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2902 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 2903 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2904 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2905 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2906 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2907 ierr = PetscFree(iflags);CHKERRQ(ierr); 2908 2909 /* prepare send/receive buffers */ 2910 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 2911 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2912 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 2913 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2914 2915 /* Get data from local mat */ 2916 if (!isdense) { 2917 /* TODO: See below some guidelines on how to prepare the local buffers */ 2918 /* 2919 send_buffer_vals should contain the raw values of the local matrix 2920 send_buffer_idxs should contain: 2921 - MatType_PRIVATE type 2922 - PetscInt size_of_l2gmap 2923 - PetscInt global_row_indices[size_of_l2gmap] 2924 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2925 */ 2926 } else { 2927 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2928 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2929 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 2930 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2931 send_buffer_idxs[1] = i; 2932 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2933 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2934 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2935 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2936 for (i=0;i<n_sends;i++) { 2937 ilengths_vals[is_indices[i]] = len*len; 2938 ilengths_idxs[is_indices[i]] = len+2; 2939 } 2940 } 2941 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2942 buf_size_idxs = 0; 2943 buf_size_vals = 0; 2944 for (i=0;i<n_recvs;i++) { 2945 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2946 buf_size_vals += (PetscInt)olengths_vals[i]; 2947 } 2948 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 2949 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 2950 2951 /* get new tags for clean communications */ 2952 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2953 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2954 2955 /* allocate for requests */ 2956 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 2957 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 2958 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 2959 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 2960 2961 /* communications */ 2962 ptr_idxs = recv_buffer_idxs; 2963 ptr_vals = recv_buffer_vals; 2964 for (i=0;i<n_recvs;i++) { 2965 source_dest = onodes[i]; 2966 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2967 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2968 ptr_idxs += olengths_idxs[i]; 2969 ptr_vals += olengths_vals[i]; 2970 } 2971 for (i=0;i<n_sends;i++) { 2972 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2973 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2974 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2975 } 2976 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2977 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2978 2979 /* assemble new l2g map */ 2980 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2981 ptr_idxs = recv_buffer_idxs; 2982 buf_size_idxs = 0; 2983 for (i=0;i<n_recvs;i++) { 2984 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2985 ptr_idxs += olengths_idxs[i]; 2986 } 2987 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 2988 ptr_idxs = recv_buffer_idxs; 2989 buf_size_idxs = 0; 2990 for (i=0;i<n_recvs;i++) { 2991 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2992 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2993 ptr_idxs += olengths_idxs[i]; 2994 } 2995 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2996 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2997 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2998 2999 /* infer new local matrix type from received local matrices type */ 3000 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3001 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3002 new_local_type_private = MATAIJ_PRIVATE; 3003 new_local_type = MATSEQAIJ; 3004 if (n_recvs) { 3005 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3006 ptr_idxs = recv_buffer_idxs; 3007 for (i=0;i<n_recvs;i++) { 3008 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3009 new_local_type_private = MATAIJ_PRIVATE; 3010 break; 3011 } 3012 ptr_idxs += olengths_idxs[i]; 3013 } 3014 switch (new_local_type_private) { 3015 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 3016 new_local_type = MATSEQAIJ; 3017 bs = 1; 3018 break; 3019 case MATAIJ_PRIVATE: 3020 new_local_type = MATSEQAIJ; 3021 bs = 1; 3022 break; 3023 case MATBAIJ_PRIVATE: 3024 new_local_type = MATSEQBAIJ; 3025 break; 3026 case MATSBAIJ_PRIVATE: 3027 new_local_type = MATSEQSBAIJ; 3028 break; 3029 default: 3030 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3031 break; 3032 } 3033 } 3034 3035 /* create MATIS object */ 3036 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3037 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 3038 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3039 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 3040 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3041 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3042 3043 /* set values */ 3044 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3045 ptr_vals = recv_buffer_vals; 3046 ptr_idxs = recv_buffer_idxs; 3047 for (i=0;i<n_recvs;i++) { 3048 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3049 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3050 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3051 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3052 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3053 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3054 } 3055 ptr_idxs += olengths_idxs[i]; 3056 ptr_vals += olengths_vals[i]; 3057 } 3058 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3059 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3062 3063 { /* check */ 3064 Vec lvec,rvec; 3065 PetscReal infty_error; 3066 3067 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3068 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3069 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3070 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3071 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 3072 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3073 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3074 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3075 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3076 } 3077 3078 /* free workspace */ 3079 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3080 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3081 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3082 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3083 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3084 if (isdense) { 3085 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3086 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3087 } else { 3088 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3089 } 3090 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3091 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3092 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3093 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3094 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3095 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3096 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3097 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3098 ierr = PetscFree(onodes);CHKERRQ(ierr); 3099 /* get back new mat */ 3100 *mat_n = new_mat; 3101 PetscFunctionReturn(0); 3102 } 3103 3104 #undef __FUNCT__ 3105 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3106 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3107 { 3108 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3109 PC_IS *pcis = (PC_IS*)pc->data; 3110 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3111 MatNullSpace CoarseNullSpace=NULL; 3112 ISLocalToGlobalMapping coarse_islg; 3113 IS coarse_is; 3114 PetscInt max_it; 3115 PetscInt im_active=-1,active_procs=-1; 3116 PC pc_temp; 3117 PCType coarse_pc_type; 3118 KSPType coarse_ksp_type; 3119 PetscBool multilevel_requested,multilevel_allowed; 3120 PetscBool setsym,issym,isbddc,isnn,coarse_reuse; 3121 MatStructure matstruct; 3122 PetscErrorCode ierr; 3123 3124 PetscFunctionBegin; 3125 /* Assign global numbering to coarse dofs */ 3126 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3127 PetscInt ocoarse_size; 3128 ocoarse_size = pcbddc->coarse_size; 3129 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3130 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3131 /* see if we can avoid some work */ 3132 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3133 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3134 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3135 coarse_reuse = PETSC_FALSE; 3136 } else { /* we can safely reuse already computed coarse matrix */ 3137 coarse_reuse = PETSC_TRUE; 3138 } 3139 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3140 coarse_reuse = PETSC_FALSE; 3141 } 3142 } else { /* primal space has not been changed, so we can reuse coarse matrix */ 3143 coarse_reuse = PETSC_TRUE; 3144 } 3145 3146 /* infer some info from user */ 3147 issym = PETSC_FALSE; 3148 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3149 multilevel_allowed = PETSC_FALSE; 3150 multilevel_requested = PETSC_FALSE; 3151 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3152 if (multilevel_requested) { 3153 /* count "active processes" */ 3154 im_active = !!(pcis->n); 3155 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3156 if (active_procs/pcbddc->coarsening_ratio < 2) { 3157 multilevel_allowed = PETSC_FALSE; 3158 } else { 3159 multilevel_allowed = PETSC_TRUE; 3160 } 3161 } 3162 3163 /* set defaults for coarse KSP and PC */ 3164 if (multilevel_allowed) { 3165 if (issym) { 3166 coarse_ksp_type = KSPRICHARDSON; 3167 } else { 3168 coarse_ksp_type = KSPCHEBYSHEV; 3169 } 3170 coarse_pc_type = PCBDDC; 3171 } else { 3172 coarse_ksp_type = KSPPREONLY; 3173 coarse_pc_type = PCREDUNDANT; 3174 } 3175 3176 /* create the coarse KSP object only once with defaults */ 3177 if (!pcbddc->coarse_ksp) { 3178 char prefix[256],str_level[3]; 3179 size_t len; 3180 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3181 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3182 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3183 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3184 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3185 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3186 /* prefix */ 3187 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3188 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3189 if (!pcbddc->current_level) { 3190 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3191 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3192 } else { 3193 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3194 if (pcbddc->current_level>1) len -= 2; 3195 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3196 *(prefix+len)='\0'; 3197 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3198 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3199 } 3200 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3201 } 3202 /* allow user customization */ 3203 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3204 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3205 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3206 3207 /* get some info after set from options */ 3208 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3209 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3210 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3211 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3212 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3213 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3214 isbddc = PETSC_FALSE; 3215 } 3216 3217 /* propagate BDDC info to the next level */ 3218 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3219 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3220 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3221 3222 /* creates temporary MATIS object for coarse matrix */ 3223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3224 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3225 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3226 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3227 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3228 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3229 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3230 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3231 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3232 3233 /* assemble coarse matrix */ 3234 if (isbddc || isnn) { 3235 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3236 } else { 3237 if (coarse_reuse) { 3238 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3239 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3240 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3241 } else { 3242 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3243 } 3244 } 3245 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3246 3247 /* create local to global scatters for coarse problem */ 3248 if (pcbddc->new_primal_space) { 3249 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3250 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3251 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3252 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3253 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3254 } 3255 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3256 3257 /* propagate symmetry info to coarse matrix */ 3258 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3259 3260 /* Compute coarse null space (special handling by BDDC only) */ 3261 if (pcbddc->NullSpace) { 3262 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3263 if (isbddc) { 3264 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3265 } else { 3266 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3267 } 3268 } 3269 3270 /* set operators */ 3271 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3272 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3273 3274 /* additional KSP customization */ 3275 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3276 if (max_it < 5) { 3277 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3278 } 3279 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3280 3281 3282 /* print some info if requested */ 3283 if (pcbddc->dbg_flag) { 3284 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3285 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3286 if (!multilevel_allowed) { 3287 if (multilevel_requested) { 3288 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3289 } else if (pcbddc->max_levels) { 3290 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3291 } 3292 } 3293 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3294 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3295 } 3296 3297 /* setup coarse ksp */ 3298 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3299 if (pcbddc->dbg_flag) { 3300 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3301 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3302 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3303 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3304 } 3305 3306 /* Check coarse problem if requested */ 3307 if (pcbddc->dbg_flag) { 3308 KSP check_ksp; 3309 KSPType check_ksp_type; 3310 PC check_pc; 3311 Vec check_vec; 3312 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3313 PetscInt its; 3314 PetscBool ispreonly,compute; 3315 3316 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3317 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3318 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3319 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3320 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3321 if (ispreonly) { 3322 check_ksp_type = KSPPREONLY; 3323 compute = PETSC_FALSE; 3324 } else { 3325 if (issym) check_ksp_type = KSPCG; 3326 else check_ksp_type = KSPGMRES; 3327 compute = PETSC_TRUE; 3328 } 3329 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3330 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3331 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3332 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3333 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3334 /* create random vec */ 3335 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3336 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3337 if (CoarseNullSpace) { 3338 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3339 } 3340 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3341 /* solve coarse problem */ 3342 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3343 if (CoarseNullSpace) { 3344 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3345 } 3346 /* check coarse problem residual error */ 3347 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3348 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3349 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3350 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3351 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3352 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3353 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3354 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3355 /* get eigenvalue estimation if preonly has not been requested */ 3356 if (!ispreonly) { 3357 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3358 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3359 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3360 } 3361 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3362 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3363 } 3364 /* free memory */ 3365 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3366 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3367 PetscFunctionReturn(0); 3368 } 3369 3370 #undef __FUNCT__ 3371 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3372 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3373 { 3374 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3375 PC_IS* pcis = (PC_IS*)pc->data; 3376 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3377 PetscInt i,coarse_size; 3378 PetscInt *local_primal_indices; 3379 PetscErrorCode ierr; 3380 3381 PetscFunctionBegin; 3382 /* Compute global number of coarse dofs */ 3383 if (!pcbddc->primal_indices_local_idxs) { 3384 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 3385 } 3386 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3387 3388 /* check numbering */ 3389 if (pcbddc->dbg_flag) { 3390 PetscScalar coarsesum,*array; 3391 3392 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3393 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3394 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3395 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3396 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3397 for (i=0;i<pcbddc->local_primal_size;i++) { 3398 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3399 } 3400 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3401 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3402 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3403 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3404 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3405 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3406 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3407 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3408 for (i=0;i<pcis->n;i++) { 3409 if (array[i] == 1.0) { 3410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3411 } 3412 } 3413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3414 for (i=0;i<pcis->n;i++) { 3415 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3416 } 3417 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3418 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3419 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3420 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3421 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3422 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3423 if (pcbddc->dbg_flag > 1) { 3424 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3425 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3426 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3427 for (i=0;i<pcbddc->local_primal_size;i++) { 3428 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3429 } 3430 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3431 } 3432 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3433 } 3434 /* get back data */ 3435 *coarse_size_n = coarse_size; 3436 *local_primal_indices_n = local_primal_indices; 3437 PetscFunctionReturn(0); 3438 } 3439 3440