1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "PCBDDCCreateWorkVectors" 110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 111 { 112 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 113 PC_IS *pcis = (PC_IS*)pc->data; 114 VecType impVecType; 115 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 120 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 121 local_primal_size = n_constraints+n_vertices; 122 n_R = pcis->n-n_vertices; 123 /* local work vectors */ 124 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 125 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 126 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 127 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 128 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 129 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 130 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 131 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 132 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 133 if (n_constraints) { 134 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 135 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 136 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 137 } 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "PCBDDCSetUpCorrection" 143 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 144 { 145 PetscErrorCode ierr; 146 /* pointers to pcis and pcbddc */ 147 PC_IS* pcis = (PC_IS*)pc->data; 148 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 149 /* submatrices of local problem */ 150 Mat A_RV,A_VR,A_VV; 151 /* working matrices */ 152 Mat M1,M2,M3,C_CR; 153 /* working vectors */ 154 Vec vec1_C,vec2_C,vec1_V,vec2_V; 155 /* additional working stuff */ 156 IS is_aux; 157 ISLocalToGlobalMapping BtoNmap; 158 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 159 const PetscScalar *array,*row_cmat_values; 160 const PetscInt *row_cmat_indices,*idx_R_local; 161 PetscInt *vertices,*idx_V_B,*auxindices; 162 PetscInt n_vertices,n_constraints,size_of_constraint; 163 PetscInt i,j,n_R,n_D,n_B; 164 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 165 /* Vector and matrix types */ 166 VecType impVecType; 167 MatType impMatType; 168 /* some shortcuts to scalars */ 169 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 170 /* for debugging purposes */ 171 PetscReal *coarsefunctions_errors,*constraints_errors; 172 173 PetscFunctionBegin; 174 /* get number of vertices and their local indices */ 175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 176 n_constraints = pcbddc->local_primal_size-n_vertices; 177 /* Set Non-overlapping dimensions */ 178 n_B = pcis->n_B; n_D = pcis->n - n_B; 179 n_R = pcis->n-n_vertices; 180 181 /* Set types for local objects needed by BDDC precondtioner */ 182 impMatType = MATSEQDENSE; 183 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 184 185 /* Allocating some extra storage just to be safe */ 186 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 187 for (i=0;i<pcis->n;i++) auxindices[i]=i; 188 189 /* vertices in boundary numbering */ 190 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 191 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 192 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 193 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 194 if (i != n_vertices) { 195 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 196 } 197 198 /* some work vectors on vertices and/or constraints */ 199 if (n_vertices) { 200 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 201 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 202 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 203 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 204 } 205 if (n_constraints) { 206 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 207 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 208 } 209 210 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 211 if (n_constraints) { 212 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 213 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 214 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 215 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 216 217 /* Extract constraints on R nodes: C_{CR} */ 218 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 219 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 220 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 221 222 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 223 for (i=0;i<n_constraints;i++) { 224 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 225 /* Get row of constraint matrix in R numbering */ 226 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 227 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 228 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 229 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 230 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 231 /* Solve for row of constraint matrix in R numbering */ 232 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 233 /* Set values in local_auxmat2 */ 234 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 235 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 236 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 237 } 238 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 240 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 241 242 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 243 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 244 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 245 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 246 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 247 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(M1);CHKERRQ(ierr); 249 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 250 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 251 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 252 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 253 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 254 ierr = MatDestroy(&M2);CHKERRQ(ierr); 255 ierr = MatDestroy(&M3);CHKERRQ(ierr); 256 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 257 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 258 } 259 260 /* Get submatrices from subdomain matrix */ 261 if (n_vertices) { 262 PetscInt ibs,iibs,mbs; 263 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 264 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 265 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 266 ierr = ISGetBlockSize(is_aux,&iibs);CHKERRQ(ierr); 267 if (ibs != mbs || iibs != mbs) { /* TODO BAIJ */ 268 Mat newmat; 269 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 270 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 271 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 272 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 273 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 274 } else { 275 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 276 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 277 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 278 } 279 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 280 } 281 282 /* Matrix of coarse basis functions (local) */ 283 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 284 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 285 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 286 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 287 if (pcbddc->switch_static || pcbddc->dbg_flag) { 288 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 289 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 290 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 291 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 292 } 293 294 if (pcbddc->dbg_flag) { 295 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 296 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 297 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 298 } 299 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 300 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 301 302 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 303 304 /* vertices */ 305 for (i=0;i<n_vertices;i++) { 306 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 307 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 308 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 309 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 310 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 311 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 312 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 313 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 314 if (n_constraints) { 315 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 316 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 317 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 318 } 319 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 320 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 321 322 /* Set values in coarse basis function and subdomain part of coarse_mat */ 323 /* coarse basis functions */ 324 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 325 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 326 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 327 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 328 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 329 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 330 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 331 if (pcbddc->switch_static || pcbddc->dbg_flag) { 332 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 333 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 334 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 335 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 336 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 337 } 338 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 339 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 340 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 341 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 342 if (n_constraints) { 343 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 344 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 345 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 346 } 347 348 /* check */ 349 if (pcbddc->dbg_flag) { 350 /* assemble subdomain vector on local nodes */ 351 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 352 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 353 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 354 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 355 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 356 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 357 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 358 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 359 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 360 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 361 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 362 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 363 if (n_constraints) { 364 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 365 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 366 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 367 } 368 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 369 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 370 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 371 /* check saddle point solution */ 372 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 373 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 374 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 375 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 376 /* shift by the identity matrix */ 377 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 378 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 379 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 380 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 381 } 382 } 383 384 /* constraints */ 385 for (i=0;i<n_constraints;i++) { 386 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 387 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 388 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 389 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 390 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 391 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 392 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 393 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 394 if (n_vertices) { 395 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 396 } 397 /* Set values in coarse basis function and subdomain part of coarse_mat */ 398 /* coarse basis functions */ 399 j = i+n_vertices; /* don't touch this! */ 400 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 401 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 404 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 406 if (pcbddc->switch_static || pcbddc->dbg_flag) { 407 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 408 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 409 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 410 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 412 } 413 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 414 if (n_vertices) { 415 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 416 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 417 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 418 } 419 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 420 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 421 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 422 423 if (pcbddc->dbg_flag) { 424 /* assemble subdomain vector on nodes */ 425 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 426 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 427 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 428 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 429 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 430 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 431 /* assemble subdomain vector of lagrange multipliers */ 432 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 433 if (n_vertices) { 434 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 437 } 438 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 439 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 440 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 441 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 442 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 443 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 444 /* check saddle point solution */ 445 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 446 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 447 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 448 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 449 /* shift by the identity matrix */ 450 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 451 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 453 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 454 } 455 } 456 /* call assembling routines for local coarse basis */ 457 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 458 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 459 if (pcbddc->switch_static || pcbddc->dbg_flag) { 460 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 461 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 462 } 463 464 /* compute other basis functions for non-symmetric problems */ 465 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 466 if (!setsym || (setsym && !issym)) { 467 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 468 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 469 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 470 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 471 if (pcbddc->switch_static || pcbddc->dbg_flag) { 472 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 473 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 474 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 475 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 476 } 477 for (i=0;i<pcbddc->local_primal_size;i++) { 478 if (n_constraints) { 479 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 480 for (j=0;j<n_constraints;j++) { 481 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 482 } 483 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 484 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 485 } 486 if (i<n_vertices) { 487 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 488 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 489 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 490 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 491 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 492 if (n_constraints) { 493 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 494 } 495 } else { 496 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 497 } 498 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 499 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 500 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 501 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 502 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 503 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 504 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 505 if (i<n_vertices) { 506 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 507 } 508 if (pcbddc->switch_static || pcbddc->dbg_flag) { 509 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 510 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 511 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 512 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 514 } 515 516 if (pcbddc->dbg_flag) { 517 /* assemble subdomain vector on nodes */ 518 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 519 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 520 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 521 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 522 if (i<n_vertices) { 523 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 524 } 525 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 526 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 527 /* assemble subdomain vector of lagrange multipliers */ 528 for (j=0;j<pcbddc->local_primal_size;j++) { 529 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 530 } 531 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 532 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 533 /* check saddle point solution */ 534 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 535 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 536 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 537 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 538 /* shift by the identity matrix */ 539 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 540 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 541 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 542 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 543 } 544 } 545 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 547 if (pcbddc->switch_static || pcbddc->dbg_flag) { 548 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 549 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 } 551 } 552 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 553 /* Checking coarse_sub_mat and coarse basis functios */ 554 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 555 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 556 if (pcbddc->dbg_flag) { 557 Mat coarse_sub_mat; 558 Mat AUXMAT,TM1,TM2,TM3,TM4; 559 Mat coarse_phi_D,coarse_phi_B; 560 Mat coarse_psi_D,coarse_psi_B; 561 Mat A_II,A_BB,A_IB,A_BI; 562 MatType checkmattype=MATSEQAIJ; 563 PetscReal real_value; 564 565 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 566 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 567 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 568 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 569 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 570 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 571 if (pcbddc->coarse_psi_B) { 572 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 573 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 574 } 575 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 576 577 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 578 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 579 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 580 if (pcbddc->coarse_psi_B) { 581 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 582 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 583 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 584 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 585 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 586 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 587 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 588 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 589 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 590 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 591 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 592 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 593 } else { 594 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 595 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 596 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 597 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 598 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 599 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 600 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 601 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 602 } 603 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 604 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 605 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 606 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 607 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 608 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 613 for (i=0;i<pcbddc->local_primal_size;i++) { 614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 615 } 616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 617 for (i=0;i<pcbddc->local_primal_size;i++) { 618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 619 } 620 if (pcbddc->coarse_psi_B) { 621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 622 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 624 } 625 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 626 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 628 } 629 } 630 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 631 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 632 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 633 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 634 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 635 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 636 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 637 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 638 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 639 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 640 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 641 if (pcbddc->coarse_psi_B) { 642 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 643 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 644 } 645 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 646 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 647 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 648 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 649 } 650 /* free memory */ 651 if (n_vertices) { 652 ierr = PetscFree(vertices);CHKERRQ(ierr); 653 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 654 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 655 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 656 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 657 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 658 } 659 if (n_constraints) { 660 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 661 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 662 ierr = MatDestroy(&M1);CHKERRQ(ierr); 663 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 664 } 665 ierr = PetscFree(auxindices);CHKERRQ(ierr); 666 /* get back data */ 667 *coarse_submat_vals_n = coarse_submat_vals; 668 PetscFunctionReturn(0); 669 } 670 671 #undef __FUNCT__ 672 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 673 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 674 { 675 PC_IS* pcis = (PC_IS*)(pc->data); 676 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 677 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 678 /* manage repeated solves */ 679 MatReuse reuse; 680 MatStructure matstruct; 681 PetscErrorCode ierr; 682 683 PetscFunctionBegin; 684 /* get mat flags */ 685 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 686 reuse = MAT_INITIAL_MATRIX; 687 if (pc->setupcalled) { 688 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 689 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 690 if (matstruct == SAME_NONZERO_PATTERN) { 691 reuse = MAT_REUSE_MATRIX; 692 } else { 693 reuse = MAT_INITIAL_MATRIX; 694 } 695 } 696 if (reuse == MAT_INITIAL_MATRIX) { 697 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 701 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 702 } 703 704 /* transform local matrices if needed */ 705 if (pcbddc->use_change_of_basis) { 706 Mat change_mat_all; 707 PetscScalar *row_cmat_values; 708 PetscInt *row_cmat_indices; 709 PetscInt *nnz,*is_indices,*temp_indices; 710 PetscInt i,j,k,n_D,n_B; 711 712 /* Get Non-overlapping dimensions */ 713 n_B = pcis->n_B; 714 n_D = pcis->n-n_B; 715 716 /* compute nonzero structure of change of basis on all local nodes */ 717 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 718 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 719 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 720 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 721 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 722 k=1; 723 for (i=0;i<n_B;i++) { 724 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 725 nnz[is_indices[i]]=j; 726 if (k < j) k = j; 727 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 728 } 729 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 730 /* assemble change of basis matrix on the whole set of local dofs */ 731 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 732 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 733 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 734 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 735 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 736 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 737 for (i=0;i<n_D;i++) { 738 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 739 } 740 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 741 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 742 for (i=0;i<n_B;i++) { 743 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 744 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 745 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 746 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 747 } 748 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 749 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 750 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 751 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 752 if (i==1) { 753 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 754 } else { 755 Mat work_mat; 756 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 757 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 758 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 759 } 760 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 761 ierr = PetscFree(nnz);CHKERRQ(ierr); 762 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 763 } else { 764 /* without change of basis, the local matrix is unchanged */ 765 if (!pcbddc->local_mat) { 766 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 767 pcbddc->local_mat = matis->A; 768 } 769 } 770 771 /* get submatrices */ 772 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 773 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 774 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 775 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 776 PetscFunctionReturn(0); 777 } 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 781 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 782 { 783 PC_IS* pcis = (PC_IS*)(pc->data); 784 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 785 IS is_aux1,is_aux2; 786 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 787 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 788 PetscBT bitmask; 789 PetscErrorCode ierr; 790 791 PetscFunctionBegin; 792 /* Set Non-overlapping dimensions */ 793 n_B = pcis->n_B; n_D = pcis->n - n_B; 794 /* get vertex indices from constraint matrix */ 795 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 796 /* Set number of constraints */ 797 n_constraints = pcbddc->local_primal_size-n_vertices; 798 /* create auxiliary bitmask */ 799 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 800 for (i=0;i<n_vertices;i++) { 801 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 802 } 803 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 804 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 805 for (i=0, n_R=0; i<pcis->n; i++) { 806 if (!PetscBTLookup(bitmask,i)) { 807 idx_R_local[n_R] = i; 808 n_R++; 809 } 810 } 811 ierr = PetscFree(vertices);CHKERRQ(ierr); 812 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 813 814 /* print some info if requested */ 815 if (pcbddc->dbg_flag) { 816 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 817 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 818 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 819 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 820 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 821 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 822 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 823 } 824 825 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 826 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 827 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 828 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 829 for (i=0; i<n_D; i++) { 830 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 831 } 832 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 833 for (i=0, j=0; i<n_R; i++) { 834 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 835 aux_array1[j++] = i; 836 } 837 } 838 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 839 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 840 for (i=0, j=0; i<n_B; i++) { 841 if (!PetscBTLookup(bitmask,is_indices[i])) { 842 aux_array2[j++] = i; 843 } 844 } 845 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 846 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 847 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 848 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 849 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 850 851 if (pcbddc->switch_static || pcbddc->dbg_flag) { 852 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 853 for (i=0, j=0; i<n_R; i++) { 854 if (PetscBTLookup(bitmask,idx_R_local[i])) { 855 aux_array1[j++] = i; 856 } 857 } 858 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 859 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 860 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 861 } 862 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 869 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 870 { 871 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 872 PC_IS *pcis = (PC_IS*)pc->data; 873 PC pc_temp; 874 Mat A_RR; 875 Vec vec1,vec2,vec3; 876 MatStructure matstruct; 877 PetscScalar m_one = -1.0; 878 PetscReal value; 879 PetscInt n_D,n_R,ibs,mbs; 880 PetscBool use_exact,use_exact_reduced; 881 PetscErrorCode ierr; 882 /* prefixes stuff */ 883 char dir_prefix[256],neu_prefix[256],str_level[3]; 884 size_t len; 885 886 PetscFunctionBegin; 887 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 888 889 /* compute prefixes */ 890 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 891 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 892 if (!pcbddc->current_level) { 893 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 894 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 895 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 896 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 897 } else { 898 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 899 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 900 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 901 len -= 15; /* remove "pc_bddc_coarse_" */ 902 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 903 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 904 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 905 *(dir_prefix+len)='\0'; 906 *(neu_prefix+len)='\0'; 907 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 908 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 909 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 910 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 911 } 912 913 /* DIRICHLET PROBLEM */ 914 /* Matrix for Dirichlet problem is pcis->A_II */ 915 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 916 if (!pcbddc->ksp_D) { /* create object if not yet build */ 917 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 918 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 919 /* default */ 920 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 921 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 922 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 923 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 924 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 925 } 926 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 927 /* Allow user's customization */ 928 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 929 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 930 if (!n_D) { 931 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 932 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 933 } 934 /* Set Up KSP for Dirichlet problem of BDDC */ 935 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 936 /* set ksp_D into pcis data */ 937 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 938 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 939 pcis->ksp_D = pcbddc->ksp_D; 940 941 /* NEUMANN PROBLEM */ 942 /* Matrix for Neumann problem is A_RR -> we need to create it */ 943 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 944 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 945 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 946 if (ibs != mbs) { /* TODO BAIJ: see if this can be avoided */ 947 Mat newmat; 948 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 949 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 950 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 951 } else { 952 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 953 } 954 if (!pcbddc->ksp_R) { /* create object if not yet build */ 955 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 956 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 957 /* default */ 958 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 959 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 960 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 961 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 962 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 963 } 964 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 965 /* Allow user's customization */ 966 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 967 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 968 if (!n_R) { 969 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 970 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 971 } 972 /* Set Up KSP for Neumann problem of BDDC */ 973 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 974 975 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 976 977 /* Dirichlet */ 978 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 979 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 980 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 981 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 982 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 983 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 984 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 985 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 986 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 987 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 988 /* need to be adapted? */ 989 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 990 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 991 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 992 /* print info */ 993 if (pcbddc->dbg_flag) { 994 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 995 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 996 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 999 } 1000 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1001 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1002 } 1003 1004 /* Neumann */ 1005 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1006 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1007 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1008 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1009 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1010 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1011 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1012 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1013 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1014 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1015 /* need to be adapted? */ 1016 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1017 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1018 /* print info */ 1019 if (pcbddc->dbg_flag) { 1020 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1021 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1022 } 1023 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1024 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1025 } 1026 /* free Neumann problem's matrix */ 1027 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1033 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1034 { 1035 PetscErrorCode ierr; 1036 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1037 1038 PetscFunctionBegin; 1039 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1040 if (pcbddc->local_auxmat1) { 1041 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1042 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1043 } 1044 PetscFunctionReturn(0); 1045 } 1046 1047 #undef __FUNCT__ 1048 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1049 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1050 { 1051 PetscErrorCode ierr; 1052 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1053 PC_IS* pcis = (PC_IS*) (pc->data); 1054 const PetscScalar zero = 0.0; 1055 1056 PetscFunctionBegin; 1057 /* Application of PHI^T (or PSI^T) */ 1058 if (pcbddc->coarse_psi_B) { 1059 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1060 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1061 } else { 1062 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1063 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1064 } 1065 /* Scatter data of coarse_rhs */ 1066 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1067 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1068 1069 /* Local solution on R nodes */ 1070 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1071 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1072 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1073 if (pcbddc->switch_static) { 1074 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1075 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1076 } 1077 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1078 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1079 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1080 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 if (pcbddc->switch_static) { 1082 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1083 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1084 } 1085 1086 /* Coarse solution */ 1087 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1089 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1090 } 1091 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1093 1094 /* Sum contributions from two levels */ 1095 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1096 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1102 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1103 { 1104 PetscErrorCode ierr; 1105 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1106 1107 PetscFunctionBegin; 1108 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1114 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1115 { 1116 PetscErrorCode ierr; 1117 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1118 1119 PetscFunctionBegin; 1120 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 /* uncomment for testing purposes */ 1125 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1128 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1129 { 1130 PetscErrorCode ierr; 1131 PC_IS* pcis = (PC_IS*)(pc->data); 1132 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1133 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1134 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1135 MatType impMatType=MATSEQAIJ; 1136 /* one and zero */ 1137 PetscScalar one=1.0,zero=0.0; 1138 /* space to store constraints and their local indices */ 1139 PetscScalar *temp_quadrature_constraint; 1140 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1141 /* iterators */ 1142 PetscInt i,j,k,total_counts,temp_start_ptr; 1143 /* stuff to store connected components stored in pcbddc->mat_graph */ 1144 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1145 PetscInt n_ISForFaces,n_ISForEdges; 1146 /* near null space stuff */ 1147 MatNullSpace nearnullsp; 1148 const Vec *nearnullvecs; 1149 Vec *localnearnullsp; 1150 PetscBool nnsp_has_cnst; 1151 PetscInt nnsp_size; 1152 PetscScalar *array; 1153 /* BLAS integers */ 1154 PetscBLASInt lwork,lierr; 1155 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1156 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1157 /* LAPACK working arrays for SVD or POD */ 1158 PetscBool skip_lapack; 1159 PetscScalar *work; 1160 PetscReal *singular_vals; 1161 #if defined(PETSC_USE_COMPLEX) 1162 PetscReal *rwork; 1163 #endif 1164 #if defined(PETSC_MISSING_LAPACK_GESVD) 1165 PetscBLASInt Blas_one_2=1; 1166 PetscScalar *temp_basis,*correlation_mat; 1167 #else 1168 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1169 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1170 #endif 1171 /* change of basis */ 1172 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1173 PetscBool boolforchange,qr_needed; 1174 PetscBT touched,change_basis,qr_needed_idx; 1175 /* auxiliary stuff */ 1176 PetscInt *nnz,*is_indices,*local_to_B; 1177 /* some quantities */ 1178 PetscInt n_vertices,total_primal_vertices; 1179 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1180 1181 1182 PetscFunctionBegin; 1183 /* Get index sets for faces, edges and vertices from graph */ 1184 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1185 pcbddc->use_vertices = PETSC_TRUE; 1186 } 1187 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1188 /* HACK: provide functions to set change of basis */ 1189 if (!ISForVertices && pcbddc->NullSpace) { 1190 pcbddc->use_change_of_basis = PETSC_TRUE; 1191 pcbddc->use_change_on_faces = PETSC_FALSE; 1192 } 1193 /* print some info */ 1194 if (pcbddc->dbg_flag) { 1195 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1196 i = 0; 1197 if (ISForVertices) { 1198 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1199 } 1200 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1201 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1202 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1203 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1204 } 1205 /* check if near null space is attached to global mat */ 1206 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1207 if (nearnullsp) { 1208 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1209 } else { /* if near null space is not provided BDDC uses constants by default */ 1210 nnsp_size = 0; 1211 nnsp_has_cnst = PETSC_TRUE; 1212 } 1213 /* get max number of constraints on a single cc */ 1214 max_constraints = nnsp_size; 1215 if (nnsp_has_cnst) max_constraints++; 1216 1217 /* 1218 Evaluate maximum storage size needed by the procedure 1219 - temp_indices will contain start index of each constraint stored as follows 1220 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1221 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1222 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1223 */ 1224 total_counts = n_ISForFaces+n_ISForEdges; 1225 total_counts *= max_constraints; 1226 n_vertices = 0; 1227 if (ISForVertices) { 1228 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1229 } 1230 total_counts += n_vertices; 1231 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1232 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1233 total_counts = 0; 1234 max_size_of_constraint = 0; 1235 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1236 if (i<n_ISForEdges) { 1237 used_IS = &ISForEdges[i]; 1238 } else { 1239 used_IS = &ISForFaces[i-n_ISForEdges]; 1240 } 1241 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1242 total_counts += j; 1243 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1244 } 1245 total_counts *= max_constraints; 1246 total_counts += n_vertices; 1247 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1248 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1249 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1250 /* local to boundary numbering */ 1251 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1252 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1253 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1254 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1255 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1256 /* get local part of global near null space vectors */ 1257 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1258 for (k=0;k<nnsp_size;k++) { 1259 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1260 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1262 } 1263 1264 /* whether or not to skip lapack calls */ 1265 skip_lapack = PETSC_TRUE; 1266 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1267 1268 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1269 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1270 PetscScalar temp_work; 1271 #if defined(PETSC_MISSING_LAPACK_GESVD) 1272 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1273 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1274 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1275 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1276 #if defined(PETSC_USE_COMPLEX) 1277 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1278 #endif 1279 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1280 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1281 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1282 lwork = -1; 1283 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1284 #if !defined(PETSC_USE_COMPLEX) 1285 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1286 #else 1287 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1288 #endif 1289 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1290 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1291 #else /* on missing GESVD */ 1292 /* SVD */ 1293 PetscInt max_n,min_n; 1294 max_n = max_size_of_constraint; 1295 min_n = max_constraints; 1296 if (max_size_of_constraint < max_constraints) { 1297 min_n = max_size_of_constraint; 1298 max_n = max_constraints; 1299 } 1300 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1301 #if defined(PETSC_USE_COMPLEX) 1302 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1303 #endif 1304 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1305 lwork = -1; 1306 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1307 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1308 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1309 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1310 #if !defined(PETSC_USE_COMPLEX) 1311 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1312 #else 1313 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1314 #endif 1315 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1316 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1317 #endif /* on missing GESVD */ 1318 /* Allocate optimal workspace */ 1319 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1320 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1321 } 1322 /* Now we can loop on constraining sets */ 1323 total_counts = 0; 1324 temp_indices[0] = 0; 1325 /* vertices */ 1326 if (ISForVertices) { 1327 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1328 if (nnsp_has_cnst) { /* consider all vertices */ 1329 for (i=0;i<n_vertices;i++) { 1330 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1331 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1332 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1333 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1334 total_counts++; 1335 } 1336 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1337 PetscBool used_vertex; 1338 for (i=0;i<n_vertices;i++) { 1339 used_vertex = PETSC_FALSE; 1340 k = 0; 1341 while (!used_vertex && k<nnsp_size) { 1342 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1343 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1344 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1345 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1346 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1347 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1348 total_counts++; 1349 used_vertex = PETSC_TRUE; 1350 } 1351 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1352 k++; 1353 } 1354 } 1355 } 1356 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1357 n_vertices = total_counts; 1358 } 1359 1360 /* edges and faces */ 1361 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1362 if (i<n_ISForEdges) { 1363 used_IS = &ISForEdges[i]; 1364 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1365 } else { 1366 used_IS = &ISForFaces[i-n_ISForEdges]; 1367 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1368 } 1369 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1370 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1371 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1372 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1373 /* change of basis should not be performed on local periodic nodes */ 1374 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1375 if (nnsp_has_cnst) { 1376 PetscScalar quad_value; 1377 temp_constraints++; 1378 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1379 for (j=0;j<size_of_constraint;j++) { 1380 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1381 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1382 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1383 } 1384 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1385 if (boolforchange) { 1386 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1387 } 1388 total_counts++; 1389 } 1390 for (k=0;k<nnsp_size;k++) { 1391 PetscReal real_value; 1392 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1393 for (j=0;j<size_of_constraint;j++) { 1394 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1395 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1396 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1397 } 1398 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1399 /* check if array is null on the connected component */ 1400 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1401 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1402 if (real_value > 0.0) { /* keep indices and values */ 1403 temp_constraints++; 1404 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1405 if (boolforchange) { 1406 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1407 } 1408 total_counts++; 1409 } 1410 } 1411 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1412 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1413 if (!pcbddc->use_nnsp_true) { 1414 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1415 1416 #if defined(PETSC_MISSING_LAPACK_GESVD) 1417 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1418 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1419 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1420 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1421 from that computed using LAPACKgesvd 1422 -> This is due to a different computation of eigenvectors in LAPACKheev 1423 -> The quality of the POD-computed basis will be the same */ 1424 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1425 /* Store upper triangular part of correlation matrix */ 1426 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1427 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1428 for (j=0;j<temp_constraints;j++) { 1429 for (k=0;k<j+1;k++) { 1430 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1431 } 1432 } 1433 /* compute eigenvalues and eigenvectors of correlation matrix */ 1434 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1435 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1436 #if !defined(PETSC_USE_COMPLEX) 1437 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1438 #else 1439 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1440 #endif 1441 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1442 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1443 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1444 j=0; 1445 while (j < temp_constraints && singular_vals[j] < tol) j++; 1446 total_counts=total_counts-j; 1447 /* scale and copy POD basis into used quadrature memory */ 1448 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1449 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1450 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1451 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1452 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1453 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1454 if (j<temp_constraints) { 1455 PetscInt ii; 1456 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1457 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1458 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1459 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1460 for (k=0;k<temp_constraints-j;k++) { 1461 for (ii=0;ii<size_of_constraint;ii++) { 1462 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1463 } 1464 } 1465 } 1466 #else /* on missing GESVD */ 1467 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1468 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1469 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1470 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1471 #if !defined(PETSC_USE_COMPLEX) 1472 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1473 #else 1474 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1475 #endif 1476 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1477 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1478 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1479 k = temp_constraints; 1480 if (k > size_of_constraint) k = size_of_constraint; 1481 j = 0; 1482 while (j < k && singular_vals[k-j-1] < tol) j++; 1483 total_counts = total_counts-temp_constraints+k-j; 1484 #endif /* on missing GESVD */ 1485 } 1486 } 1487 /* free index sets of faces, edges and vertices */ 1488 for (i=0;i<n_ISForFaces;i++) { 1489 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1490 } 1491 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1492 for (i=0;i<n_ISForEdges;i++) { 1493 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1494 } 1495 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1496 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1497 1498 /* free workspace */ 1499 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1500 ierr = PetscFree(work);CHKERRQ(ierr); 1501 #if defined(PETSC_USE_COMPLEX) 1502 ierr = PetscFree(rwork);CHKERRQ(ierr); 1503 #endif 1504 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1505 #if defined(PETSC_MISSING_LAPACK_GESVD) 1506 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1507 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1508 #endif 1509 } 1510 for (k=0;k<nnsp_size;k++) { 1511 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1512 } 1513 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1514 1515 /* set quantities in pcbddc data structure */ 1516 /* n_vertices defines the number of subdomain corners in the primal space */ 1517 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1518 pcbddc->local_primal_size = total_counts; 1519 pcbddc->n_vertices = n_vertices; 1520 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1521 1522 /* Create constraint matrix */ 1523 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1524 /* so we need to set it up properly either with or without change of basis */ 1525 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1526 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1527 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1528 /* array to compute a local numbering of constraints : vertices first then constraints */ 1529 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1530 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1531 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1532 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1533 /* auxiliary stuff for basis change */ 1534 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1535 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1536 1537 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1538 total_primal_vertices=0; 1539 for (i=0;i<pcbddc->local_primal_size;i++) { 1540 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1541 if (size_of_constraint == 1) { 1542 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1543 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1544 aux_primal_minloc[total_primal_vertices]=0; 1545 total_primal_vertices++; 1546 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1547 PetscInt min_loc,min_index; 1548 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1549 /* find first untouched local node */ 1550 k = 0; 1551 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1552 min_index = global_indices[k]; 1553 min_loc = k; 1554 /* search the minimum among global nodes already untouched on the cc */ 1555 for (k=1;k<size_of_constraint;k++) { 1556 /* there can be more than one constraint on a single connected component */ 1557 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1558 min_index = global_indices[k]; 1559 min_loc = k; 1560 } 1561 } 1562 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1563 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1564 aux_primal_minloc[total_primal_vertices]=min_loc; 1565 total_primal_vertices++; 1566 } 1567 } 1568 /* determine if a QR strategy is needed for change of basis */ 1569 qr_needed = PETSC_FALSE; 1570 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1571 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1572 if (PetscBTLookup(change_basis,i)) { 1573 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1574 j = 0; 1575 for (k=0;k<size_of_constraint;k++) { 1576 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1577 j++; 1578 } 1579 } 1580 /* found more than one primal dof on the cc */ 1581 if (j > 1) { 1582 PetscBTSet(qr_needed_idx,i); 1583 qr_needed = PETSC_TRUE; 1584 } 1585 } 1586 } 1587 /* free workspace */ 1588 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1589 /* permute indices in order to have a sorted set of vertices */ 1590 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1591 1592 /* nonzero structure of constraint matrix */ 1593 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1594 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1595 j=total_primal_vertices; 1596 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1597 if (!PetscBTLookup(change_basis,i)) { 1598 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1599 j++; 1600 } 1601 } 1602 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1603 ierr = PetscFree(nnz);CHKERRQ(ierr); 1604 /* set values in constraint matrix */ 1605 for (i=0;i<total_primal_vertices;i++) { 1606 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1607 } 1608 total_counts = total_primal_vertices; 1609 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1610 if (!PetscBTLookup(change_basis,i)) { 1611 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1612 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1613 total_counts++; 1614 } 1615 } 1616 /* assembling */ 1617 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1618 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1619 /* 1620 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1621 */ 1622 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1623 if (pcbddc->use_change_of_basis) { 1624 /* dual and primal dofs on a single cc */ 1625 PetscInt dual_dofs,primal_dofs; 1626 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1627 PetscInt primal_counter; 1628 /* working stuff for GEQRF */ 1629 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1630 PetscBLASInt lqr_work; 1631 /* working stuff for UNGQR */ 1632 PetscScalar *gqr_work,lgqr_work_t; 1633 PetscBLASInt lgqr_work; 1634 /* working stuff for TRTRS */ 1635 PetscScalar *trs_rhs; 1636 PetscBLASInt Blas_NRHS; 1637 /* pointers for values insertion into change of basis matrix */ 1638 PetscInt *start_rows,*start_cols; 1639 PetscScalar *start_vals; 1640 /* working stuff for values insertion */ 1641 PetscBT is_primal; 1642 1643 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1644 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1645 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1646 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1647 /* work arrays */ 1648 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1649 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1650 /* nonzeros per row */ 1651 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1652 if (PetscBTLookup(change_basis,i)) { 1653 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1654 if (PetscBTLookup(qr_needed_idx,i)) { 1655 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1656 } else { 1657 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1658 /* get local primal index on the cc */ 1659 j = 0; 1660 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1661 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1662 } 1663 } 1664 } 1665 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1666 ierr = PetscFree(nnz);CHKERRQ(ierr); 1667 /* Set initial identity in the matrix */ 1668 for (i=0;i<pcis->n_B;i++) { 1669 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1670 } 1671 1672 if (pcbddc->dbg_flag) { 1673 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1674 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1675 } 1676 1677 1678 /* Now we loop on the constraints which need a change of basis */ 1679 /* 1680 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1681 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1682 1683 Basic blocks of change of basis matrix T computed 1684 1685 - Using the following block transformation if there is only a primal dof on the cc 1686 (in the example, primal dof is the last one of the edge in LOCAL ordering 1687 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1688 | 1 0 ... 0 1 | 1689 | 0 1 ... 0 1 | 1690 | ... | 1691 | 0 ... 1 1 | 1692 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1693 1694 - via QR decomposition of constraints otherwise 1695 */ 1696 if (qr_needed) { 1697 /* space to store Q */ 1698 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1699 /* first we issue queries for optimal work */ 1700 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1701 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1702 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1703 lqr_work = -1; 1704 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1705 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1706 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1707 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1708 lgqr_work = -1; 1709 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1710 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1711 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1712 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1713 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1714 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1715 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1716 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1717 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1718 /* array to store scaling factors for reflectors */ 1719 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1720 /* array to store rhs and solution of triangular solver */ 1721 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1722 /* allocating workspace for check */ 1723 if (pcbddc->dbg_flag) { 1724 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1725 } 1726 } 1727 /* array to store whether a node is primal or not */ 1728 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1729 for (i=0;i<total_primal_vertices;i++) { 1730 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1731 } 1732 1733 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1734 /* -> using implicit ordering contained in temp_indices data */ 1735 total_counts = pcbddc->n_vertices; 1736 primal_counter = total_counts; 1737 while (total_counts<pcbddc->local_primal_size) { 1738 primal_dofs = 1; 1739 if (PetscBTLookup(change_basis,total_counts)) { 1740 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1741 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1742 primal_dofs++; 1743 } 1744 /* get constraint info */ 1745 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1746 dual_dofs = size_of_constraint-primal_dofs; 1747 1748 if (pcbddc->dbg_flag) { 1749 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1750 } 1751 1752 if (primal_dofs > 1) { /* QR */ 1753 1754 /* copy quadrature constraints for change of basis check */ 1755 if (pcbddc->dbg_flag) { 1756 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1757 } 1758 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1759 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1760 1761 /* compute QR decomposition of constraints */ 1762 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1763 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1764 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1765 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1766 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1767 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1768 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1769 1770 /* explictly compute R^-T */ 1771 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1772 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1773 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1774 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1775 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1776 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1777 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1778 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1779 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1780 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1781 1782 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1783 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1784 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1785 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1786 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1787 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1788 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1789 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1790 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1791 1792 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1793 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1794 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1795 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1796 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1797 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1798 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1799 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1800 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1801 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1802 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1803 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1804 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1805 1806 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1807 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1808 /* insert cols for primal dofs */ 1809 for (j=0;j<primal_dofs;j++) { 1810 start_vals = &qr_basis[j*size_of_constraint]; 1811 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1812 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1813 } 1814 /* insert cols for dual dofs */ 1815 for (j=0,k=0;j<dual_dofs;k++) { 1816 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1817 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1818 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1819 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1820 j++; 1821 } 1822 } 1823 1824 /* check change of basis */ 1825 if (pcbddc->dbg_flag) { 1826 PetscInt ii,jj; 1827 PetscBool valid_qr=PETSC_TRUE; 1828 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1829 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1830 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1831 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1832 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1833 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1834 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1835 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1836 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1837 for (jj=0;jj<size_of_constraint;jj++) { 1838 for (ii=0;ii<primal_dofs;ii++) { 1839 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1840 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1841 } 1842 } 1843 if (!valid_qr) { 1844 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1845 for (jj=0;jj<size_of_constraint;jj++) { 1846 for (ii=0;ii<primal_dofs;ii++) { 1847 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1848 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1849 } 1850 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1851 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1852 } 1853 } 1854 } 1855 } else { 1856 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1857 } 1858 } 1859 } else { /* simple transformation block */ 1860 PetscInt row,col; 1861 PetscScalar val; 1862 for (j=0;j<size_of_constraint;j++) { 1863 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1864 if (!PetscBTLookup(is_primal,row)) { 1865 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1866 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1867 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1868 } else { 1869 for (k=0;k<size_of_constraint;k++) { 1870 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1871 if (row != col) { 1872 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1873 } else { 1874 val = 1.0; 1875 } 1876 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1877 } 1878 } 1879 } 1880 } 1881 /* increment primal counter */ 1882 primal_counter += primal_dofs; 1883 } else { 1884 if (pcbddc->dbg_flag) { 1885 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1886 } 1887 } 1888 /* increment constraint counter total_counts */ 1889 total_counts += primal_dofs; 1890 } 1891 1892 /* free workspace */ 1893 if (qr_needed) { 1894 if (pcbddc->dbg_flag) { 1895 ierr = PetscFree(work);CHKERRQ(ierr); 1896 } 1897 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1898 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1899 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1900 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1901 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1902 } 1903 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1904 /* assembling */ 1905 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1906 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1907 /* 1908 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1909 */ 1910 } 1911 1912 /* flush dbg viewer */ 1913 if (pcbddc->dbg_flag) { 1914 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1915 } 1916 1917 /* free workspace */ 1918 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1919 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 1920 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1921 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1922 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1923 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1924 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1925 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1926 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1927 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1933 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1934 { 1935 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1936 PC_IS *pcis = (PC_IS*)pc->data; 1937 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1938 PetscInt bs,ierr,i,vertex_size; 1939 PetscViewer viewer=pcbddc->dbg_viewer; 1940 1941 PetscFunctionBegin; 1942 /* Init local Graph struct */ 1943 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1944 1945 /* Check validity of the csr graph passed in by the user */ 1946 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1947 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1948 } 1949 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1950 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1951 Mat mat_adj; 1952 const PetscInt *xadj,*adjncy; 1953 PetscBool flg_row=PETSC_TRUE; 1954 1955 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1956 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1957 if (!flg_row) { 1958 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1959 } 1960 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1961 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1962 if (!flg_row) { 1963 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1964 } 1965 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1966 } 1967 1968 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1969 vertex_size = 1; 1970 if (!pcbddc->n_ISForDofs) { 1971 IS *custom_ISForDofs; 1972 1973 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1974 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1975 for (i=0;i<bs;i++) { 1976 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1977 } 1978 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1979 /* remove my references to IS objects */ 1980 for (i=0;i<bs;i++) { 1981 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1982 } 1983 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1984 } else { /* mat block size as vertex size (used for elasticity) */ 1985 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1986 } 1987 1988 /* Setup of Graph */ 1989 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1990 1991 /* Graph's connected components analysis */ 1992 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1993 1994 /* print some info to stdout */ 1995 if (pcbddc->dbg_flag) { 1996 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1997 } 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2003 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2004 { 2005 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2006 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 n = 0; 2011 vertices = 0; 2012 if (pcbddc->ConstraintMatrix) { 2013 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2014 for (i=0;i<local_primal_size;i++) { 2015 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2016 if (size_of_constraint == 1) n++; 2017 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2018 } 2019 if (vertices_idx) { 2020 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2021 n = 0; 2022 for (i=0;i<local_primal_size;i++) { 2023 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2024 if (size_of_constraint == 1) { 2025 vertices[n++]=row_cmat_indices[0]; 2026 } 2027 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2028 } 2029 } 2030 } 2031 *n_vertices = n; 2032 if (vertices_idx) *vertices_idx = vertices; 2033 PetscFunctionReturn(0); 2034 } 2035 2036 #undef __FUNCT__ 2037 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2038 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2039 { 2040 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2041 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2042 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2043 PetscBT touched; 2044 PetscErrorCode ierr; 2045 2046 /* This function assumes that the number of local constraints per connected component 2047 is not greater than the number of nodes defined for the connected component 2048 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2049 PetscFunctionBegin; 2050 n = 0; 2051 constraints_index = 0; 2052 if (pcbddc->ConstraintMatrix) { 2053 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2054 max_size_of_constraint = 0; 2055 for (i=0;i<local_primal_size;i++) { 2056 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2057 if (size_of_constraint > 1) { 2058 n++; 2059 } 2060 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2061 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2062 } 2063 if (constraints_idx) { 2064 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2065 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2066 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2067 n = 0; 2068 for (i=0;i<local_primal_size;i++) { 2069 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2070 if (size_of_constraint > 1) { 2071 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2072 /* find first untouched local node */ 2073 j = 0; 2074 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2075 min_index = row_cmat_global_indices[j]; 2076 min_loc = j; 2077 /* search the minimum among nodes not yet touched on the connected component 2078 since there can be more than one constraint on a single cc */ 2079 for (j=1;j<size_of_constraint;j++) { 2080 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2081 min_index = row_cmat_global_indices[j]; 2082 min_loc = j; 2083 } 2084 } 2085 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2086 constraints_index[n++] = row_cmat_indices[min_loc]; 2087 } 2088 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2089 } 2090 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2091 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2092 } 2093 } 2094 *n_constraints = n; 2095 if (constraints_idx) *constraints_idx = constraints_index; 2096 PetscFunctionReturn(0); 2097 } 2098 2099 /* the next two functions has been adapted from pcis.c */ 2100 #undef __FUNCT__ 2101 #define __FUNCT__ "PCBDDCApplySchur" 2102 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2103 { 2104 PetscErrorCode ierr; 2105 PC_IS *pcis = (PC_IS*)(pc->data); 2106 2107 PetscFunctionBegin; 2108 if (!vec2_B) { vec2_B = v; } 2109 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2110 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2111 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2112 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2113 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2114 PetscFunctionReturn(0); 2115 } 2116 2117 #undef __FUNCT__ 2118 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2119 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2120 { 2121 PetscErrorCode ierr; 2122 PC_IS *pcis = (PC_IS*)(pc->data); 2123 2124 PetscFunctionBegin; 2125 if (!vec2_B) { vec2_B = v; } 2126 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2127 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2128 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2129 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2130 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "PCBDDCSubsetNumbering" 2136 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2137 { 2138 Vec local_vec,global_vec; 2139 IS seqis,paris; 2140 VecScatter scatter_ctx; 2141 PetscScalar *array; 2142 PetscInt *temp_global_dofs; 2143 PetscScalar globalsum; 2144 PetscInt i,j,s; 2145 PetscInt nlocals,first_index,old_index,max_local; 2146 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2147 PetscMPIInt *dof_sizes,*dof_displs; 2148 PetscBool first_found; 2149 PetscErrorCode ierr; 2150 2151 PetscFunctionBegin; 2152 /* mpi buffers */ 2153 MPI_Comm_size(comm,&size_prec_comm); 2154 MPI_Comm_rank(comm,&rank_prec_comm); 2155 j = ( !rank_prec_comm ? size_prec_comm : 0); 2156 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2157 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2158 /* get maximum size of subset */ 2159 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2160 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2161 max_local = 0; 2162 if (n_local_dofs) { 2163 max_local = temp_global_dofs[0]; 2164 for (i=1;i<n_local_dofs;i++) { 2165 if (max_local < temp_global_dofs[i] ) { 2166 max_local = temp_global_dofs[i]; 2167 } 2168 } 2169 } 2170 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2171 max_global++; 2172 max_local = 0; 2173 if (n_local_dofs) { 2174 max_local = local_dofs[0]; 2175 for (i=1;i<n_local_dofs;i++) { 2176 if (max_local < local_dofs[i] ) { 2177 max_local = local_dofs[i]; 2178 } 2179 } 2180 } 2181 max_local++; 2182 /* allocate workspace */ 2183 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2184 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2185 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2186 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2187 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2188 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2189 /* create scatter */ 2190 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2191 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2192 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2193 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2194 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2195 /* init array */ 2196 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2197 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2198 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2199 if (local_dofs_mult) { 2200 for (i=0;i<n_local_dofs;i++) { 2201 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2202 } 2203 } else { 2204 for (i=0;i<n_local_dofs;i++) { 2205 array[local_dofs[i]]=1.0; 2206 } 2207 } 2208 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2209 /* scatter into global vec and get total number of global dofs */ 2210 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2211 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2212 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2213 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2214 /* Fill global_vec with cumulative function for global numbering */ 2215 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2216 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2217 nlocals = 0; 2218 first_index = -1; 2219 first_found = PETSC_FALSE; 2220 for (i=0;i<s;i++) { 2221 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2222 first_found = PETSC_TRUE; 2223 first_index = i; 2224 } 2225 nlocals += (PetscInt)PetscRealPart(array[i]); 2226 } 2227 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2228 if (!rank_prec_comm) { 2229 dof_displs[0]=0; 2230 for (i=1;i<size_prec_comm;i++) { 2231 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2232 } 2233 } 2234 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2235 if (first_found) { 2236 array[first_index] += (PetscScalar)nlocals; 2237 old_index = first_index; 2238 for (i=first_index+1;i<s;i++) { 2239 if (PetscRealPart(array[i]) > 0.0) { 2240 array[i] += array[old_index]; 2241 old_index = i; 2242 } 2243 } 2244 } 2245 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2246 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2247 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2248 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2249 /* get global ordering of local dofs */ 2250 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2251 if (local_dofs_mult) { 2252 for (i=0;i<n_local_dofs;i++) { 2253 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2254 } 2255 } else { 2256 for (i=0;i<n_local_dofs;i++) { 2257 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2258 } 2259 } 2260 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2261 /* free workspace */ 2262 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2263 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2264 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2265 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2266 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2267 /* return pointer to global ordering of local dofs */ 2268 *global_numbering_subset = temp_global_dofs; 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2274 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2275 { 2276 PetscInt i,j; 2277 PetscScalar *alphas; 2278 PetscErrorCode ierr; 2279 2280 PetscFunctionBegin; 2281 /* this implements stabilized Gram-Schmidt */ 2282 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2283 for (i=0;i<n;i++) { 2284 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2285 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2286 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2287 } 2288 ierr = PetscFree(alphas);CHKERRQ(ierr); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatConvert_IS_AIJ" 2295 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2296 { 2297 Mat new_mat; 2298 Mat_IS *matis = (Mat_IS*)(mat->data); 2299 /* info on mat */ 2300 PetscInt bs,rows,cols; 2301 PetscInt lrows,lcols; 2302 PetscInt local_rows,local_cols; 2303 PetscMPIInt nsubdomains; 2304 /* preallocation */ 2305 Vec vec_dnz,vec_onz; 2306 PetscScalar *my_dnz,*my_onz,*array; 2307 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2308 PetscInt index_row,index_col,owner; 2309 PetscInt *local_indices,*global_indices; 2310 /* work */ 2311 PetscInt i,j; 2312 PetscErrorCode ierr; 2313 2314 PetscFunctionBegin; 2315 /* CHECKS */ 2316 /* get info from mat */ 2317 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2318 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2319 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2320 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2321 2322 /* MAT_INITIAL_MATRIX */ 2323 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2324 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2325 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2326 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2327 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2328 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2329 2330 /* 2331 preallocation 2332 */ 2333 2334 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2335 /* 2336 Some vectors are needed to sum up properly on shared interface dofs. 2337 Note that preallocation is not exact, since it overestimates nonzeros 2338 */ 2339 /* 2340 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2341 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2342 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2343 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2344 */ 2345 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2346 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2347 /* All processes need to compute entire row ownership */ 2348 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2349 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2350 for (i=0;i<nsubdomains;i++) { 2351 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2352 row_ownership[j]=i; 2353 } 2354 } 2355 /* map indices of local mat to global values */ 2356 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2357 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2358 for (i=0;i<local_rows;i++) local_indices[i]=i; 2359 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2360 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2361 2362 /* 2363 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2364 then, they will be summed up properly. This way, preallocation is always sufficient 2365 */ 2366 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2367 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2368 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2369 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2370 for (i=0;i<local_rows;i++) { 2371 index_row = global_indices[i]; 2372 for (j=i;j<local_rows;j++) { 2373 owner = row_ownership[index_row]; 2374 index_col = global_indices[j]; 2375 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2376 my_dnz[i] += 1.0; 2377 } else { /* offdiag block */ 2378 my_onz[i] += 1.0; 2379 } 2380 /* same as before, interchanging rows and cols */ 2381 if (i != j) { 2382 owner = row_ownership[index_col]; 2383 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2384 my_dnz[j] += 1.0; 2385 } else { 2386 my_onz[j] += 1.0; 2387 } 2388 } 2389 } 2390 } 2391 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2392 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2393 if (local_rows) { /* multilevel guard */ 2394 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2395 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2396 } 2397 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2398 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2399 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2400 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2401 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2402 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2403 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2404 2405 /* set computed preallocation in dnz and onz */ 2406 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2407 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2408 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2409 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2410 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2411 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2412 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2413 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2414 2415 /* Resize preallocation if overestimated */ 2416 for (i=0;i<lrows;i++) { 2417 dnz[i] = PetscMin(dnz[i],lcols); 2418 onz[i] = PetscMin(onz[i],cols-lcols); 2419 } 2420 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2421 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2422 2423 /* 2424 Set values. Very Basic. 2425 */ 2426 for (i=0;i<local_rows;i++) { 2427 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2428 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2429 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2430 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2431 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2432 } 2433 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2434 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2435 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2436 2437 /* get back mat */ 2438 *M = new_mat; 2439 PetscFunctionReturn(0); 2440 } 2441 2442 #undef __FUNCT__ 2443 #define __FUNCT__ "MatISSubassemble_Private" 2444 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2445 { 2446 Mat subdomain_adj; 2447 IS new_ranks,ranks_send_to; 2448 MatPartitioning partitioner; 2449 Mat_IS *matis; 2450 PetscInt n_neighs,*neighs,*n_shared,**shared; 2451 PetscInt prank; 2452 PetscMPIInt size,rank,color; 2453 PetscInt *xadj,*adjncy,*oldranks; 2454 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2455 PetscInt i,j,n_subdomains,local_size,threshold=0; 2456 PetscErrorCode ierr; 2457 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2458 PetscSubcomm subcomm; 2459 2460 PetscFunctionBegin; 2461 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2462 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2463 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2464 2465 /* Get info on mapping */ 2466 matis = (Mat_IS*)(mat->data); 2467 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2468 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2469 2470 /* build local CSR graph of subdomains' connectivity */ 2471 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2472 xadj[0] = 0; 2473 xadj[1] = PetscMax(n_neighs-1,0); 2474 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2475 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2476 2477 if (threshold) { 2478 PetscInt* count,min_threshold; 2479 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2480 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2481 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2482 for (j=0;j<n_shared[i];j++) { 2483 count[shared[i][j]] += 1; 2484 } 2485 } 2486 /* adapt threshold since we dont want to lose significant connections */ 2487 min_threshold = n_neighs; 2488 for (i=1;i<n_neighs;i++) { 2489 for (j=0;j<n_shared[i];j++) { 2490 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2491 } 2492 } 2493 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2494 threshold = PetscMax(min_threshold+1,threshold); 2495 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2496 xadj[1] = 0; 2497 for (i=1;i<n_neighs;i++) { 2498 for (j=0;j<n_shared[i];j++) { 2499 if (count[shared[i][j]] < threshold) { 2500 adjncy[xadj[1]] = neighs[i]; 2501 adjncy_wgt[xadj[1]] = n_shared[i]; 2502 xadj[1]++; 2503 break; 2504 } 2505 } 2506 } 2507 ierr = PetscFree(count);CHKERRQ(ierr); 2508 } else { 2509 if (xadj[1]) { 2510 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2511 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2512 } 2513 } 2514 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2515 if (use_square) { 2516 for (i=0;i<xadj[1];i++) { 2517 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2518 } 2519 } 2520 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2521 2522 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2523 2524 /* 2525 Restrict work on active processes only. 2526 */ 2527 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2528 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2529 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2530 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2531 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2532 if (color) { 2533 ierr = PetscFree(xadj);CHKERRQ(ierr); 2534 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2535 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2536 } else { 2537 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2538 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2539 prank = rank; 2540 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2541 for (i=0;i<size;i++) { 2542 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2543 } 2544 for (i=0;i<xadj[1];i++) { 2545 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2546 } 2547 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2548 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2549 n_subdomains = (PetscInt)size; 2550 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2551 2552 /* Partition */ 2553 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2554 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2555 if (use_vwgt) { 2556 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2557 v_wgt[0] = local_size; 2558 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2559 } 2560 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2561 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2562 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2563 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2564 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2565 2566 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2567 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2568 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2569 /* clean up */ 2570 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2571 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2572 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2573 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2574 } 2575 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2576 2577 /* assemble parallel IS for sends */ 2578 i = 1; 2579 if (color) i=0; 2580 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2581 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2582 2583 /* get back IS */ 2584 *is_sends = ranks_send_to; 2585 PetscFunctionReturn(0); 2586 } 2587 2588 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2589 2590 #undef __FUNCT__ 2591 #define __FUNCT__ "MatISSubassemble" 2592 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2593 { 2594 Mat local_mat,new_mat; 2595 Mat_IS *matis; 2596 IS is_sends_internal; 2597 PetscInt rows,cols; 2598 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2599 PetscBool ismatis,isdense; 2600 ISLocalToGlobalMapping l2gmap; 2601 PetscInt* l2gmap_indices; 2602 const PetscInt* is_indices; 2603 MatType new_local_type; 2604 MatTypePrivate new_local_type_private; 2605 /* buffers */ 2606 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2607 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2608 /* MPI */ 2609 MPI_Comm comm; 2610 PetscMPIInt n_sends,n_recvs,commsize; 2611 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2612 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2613 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2614 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2615 PetscErrorCode ierr; 2616 2617 PetscFunctionBegin; 2618 /* checks */ 2619 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2620 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2621 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2622 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2623 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2624 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2625 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2626 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2627 PetscValidLogicalCollectiveInt(mat,bs,0); 2628 /* prepare IS for sending if not provided */ 2629 if (!is_sends) { 2630 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2631 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2632 } else { 2633 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2634 is_sends_internal = is_sends; 2635 } 2636 2637 /* get pointer of MATIS data */ 2638 matis = (Mat_IS*)mat->data; 2639 2640 /* get comm */ 2641 comm = PetscObjectComm((PetscObject)mat); 2642 2643 /* compute number of sends */ 2644 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2645 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2646 2647 /* compute number of receives */ 2648 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2649 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2650 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2651 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2652 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2653 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2654 ierr = PetscFree(iflags);CHKERRQ(ierr); 2655 2656 /* prepare send/receive buffers */ 2657 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2658 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2659 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2660 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2661 2662 /* Get data from local mat */ 2663 if (!isdense) { 2664 /* TODO: See below some guidelines on how to prepare the local buffers */ 2665 /* 2666 send_buffer_vals should contain the raw values of the local matrix 2667 send_buffer_idxs should contain: 2668 - MatType_PRIVATE type 2669 - PetscInt size_of_l2gmap 2670 - PetscInt global_row_indices[size_of_l2gmap] 2671 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2672 */ 2673 } else { 2674 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2675 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2676 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2677 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2678 send_buffer_idxs[1] = i; 2679 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2680 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2681 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2682 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2683 for (i=0;i<n_sends;i++) { 2684 ilengths_vals[is_indices[i]] = len*len; 2685 ilengths_idxs[is_indices[i]] = len+2; 2686 } 2687 } 2688 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2689 buf_size_idxs = 0; 2690 buf_size_vals = 0; 2691 for (i=0;i<n_recvs;i++) { 2692 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2693 buf_size_vals += (PetscInt)olengths_vals[i]; 2694 } 2695 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2696 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2697 2698 /* get new tags for clean communications */ 2699 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2700 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2701 2702 /* allocate for requests */ 2703 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2704 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2705 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2706 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2707 2708 /* communications */ 2709 ptr_idxs = recv_buffer_idxs; 2710 ptr_vals = recv_buffer_vals; 2711 for (i=0;i<n_recvs;i++) { 2712 source_dest = onodes[i]; 2713 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2714 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2715 ptr_idxs += olengths_idxs[i]; 2716 ptr_vals += olengths_vals[i]; 2717 } 2718 for (i=0;i<n_sends;i++) { 2719 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2720 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2721 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2722 } 2723 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2724 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2725 2726 /* assemble new l2g map */ 2727 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2728 ptr_idxs = recv_buffer_idxs; 2729 buf_size_idxs = 0; 2730 for (i=0;i<n_recvs;i++) { 2731 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2732 ptr_idxs += olengths_idxs[i]; 2733 } 2734 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2735 ptr_idxs = recv_buffer_idxs; 2736 buf_size_idxs = 0; 2737 for (i=0;i<n_recvs;i++) { 2738 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2739 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2740 ptr_idxs += olengths_idxs[i]; 2741 } 2742 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2743 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2744 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2745 2746 /* infer new local matrix type from received local matrices type */ 2747 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2748 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2749 new_local_type_private = MATAIJ_PRIVATE; 2750 new_local_type = MATSEQAIJ; 2751 if (n_recvs) { 2752 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2753 ptr_idxs = recv_buffer_idxs; 2754 for (i=0;i<n_recvs;i++) { 2755 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2756 new_local_type_private = MATAIJ_PRIVATE; 2757 break; 2758 } 2759 ptr_idxs += olengths_idxs[i]; 2760 } 2761 switch (new_local_type_private) { 2762 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2763 new_local_type = MATSEQAIJ; 2764 bs = 1; 2765 break; 2766 case MATAIJ_PRIVATE: 2767 new_local_type = MATSEQAIJ; 2768 bs = 1; 2769 break; 2770 case MATBAIJ_PRIVATE: 2771 new_local_type = MATSEQBAIJ; 2772 break; 2773 case MATSBAIJ_PRIVATE: 2774 new_local_type = MATSEQSBAIJ; 2775 break; 2776 default: 2777 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2778 break; 2779 } 2780 } 2781 2782 /* create MATIS object */ 2783 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2784 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2785 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2786 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2787 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2788 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2789 2790 /* set values */ 2791 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2792 ptr_vals = recv_buffer_vals; 2793 ptr_idxs = recv_buffer_idxs; 2794 for (i=0;i<n_recvs;i++) { 2795 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2796 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2797 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2798 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2799 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2800 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2801 } 2802 ptr_idxs += olengths_idxs[i]; 2803 ptr_vals += olengths_vals[i]; 2804 } 2805 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2806 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2807 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2808 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2809 2810 { /* check */ 2811 Vec lvec,rvec; 2812 PetscReal infty_error; 2813 2814 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2815 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2816 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2817 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2818 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2819 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2820 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2821 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2822 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2823 } 2824 2825 /* free workspace */ 2826 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2827 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2828 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2829 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2830 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2831 if (isdense) { 2832 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2833 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2834 } else { 2835 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2836 } 2837 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2838 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2839 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2840 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2841 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2842 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2843 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2844 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2845 ierr = PetscFree(onodes);CHKERRQ(ierr); 2846 /* get back new mat */ 2847 *mat_n = new_mat; 2848 PetscFunctionReturn(0); 2849 } 2850 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2853 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2854 { 2855 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2856 PC_IS *pcis = (PC_IS*)pc->data; 2857 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2858 MatNullSpace CoarseNullSpace=NULL; 2859 ISLocalToGlobalMapping coarse_islg; 2860 IS coarse_is; 2861 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2862 PetscInt im_active=-1,active_procs=-1; 2863 PC pc_temp; 2864 PCType coarse_pc_type; 2865 KSPType coarse_ksp_type; 2866 PetscBool multilevel_requested,multilevel_allowed; 2867 PetscBool setsym,issym,isbddc,isnn; 2868 MatStructure matstruct; 2869 PetscErrorCode ierr; 2870 2871 PetscFunctionBegin; 2872 /* Assign global numbering to coarse dofs */ 2873 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2874 2875 /* infer some info from user */ 2876 issym = PETSC_FALSE; 2877 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2878 multilevel_allowed = PETSC_FALSE; 2879 multilevel_requested = PETSC_FALSE; 2880 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2881 if (multilevel_requested) { 2882 /* count "active processes" */ 2883 im_active = !!(pcis->n); 2884 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2885 if (active_procs/pcbddc->coarsening_ratio < 2) { 2886 multilevel_allowed = PETSC_FALSE; 2887 } else { 2888 multilevel_allowed = PETSC_TRUE; 2889 } 2890 } 2891 2892 /* set defaults for coarse KSP and PC */ 2893 if (multilevel_allowed) { 2894 if (issym) { 2895 coarse_ksp_type = KSPRICHARDSON; 2896 } else { 2897 coarse_ksp_type = KSPCHEBYSHEV; 2898 } 2899 coarse_pc_type = PCBDDC; 2900 } else { 2901 coarse_ksp_type = KSPPREONLY; 2902 coarse_pc_type = PCREDUNDANT; 2903 } 2904 2905 /* create the coarse KSP object only once with defaults */ 2906 if (!pcbddc->coarse_ksp) { 2907 char prefix[256],str_level[3]; 2908 size_t len; 2909 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2910 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2911 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2912 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2913 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2914 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2915 /* prefix */ 2916 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 2917 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2918 if (!pcbddc->current_level) { 2919 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2920 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 2921 } else { 2922 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2923 if (pcbddc->current_level>1) len -= 2; 2924 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 2925 *(prefix+len)='\0'; 2926 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 2927 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 2928 } 2929 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 2930 } 2931 /* allow user customization */ 2932 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 2933 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2934 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 2935 2936 /* get some info after set from options */ 2937 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2938 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 2939 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 2940 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 2941 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2942 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2943 isbddc = PETSC_FALSE; 2944 } 2945 2946 /* propagate BDDC info to the next level */ 2947 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 2948 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2949 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 2950 2951 /* creates temporary MATIS object for coarse matrix */ 2952 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 2953 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 2954 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 2955 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 2956 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 2957 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2958 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 2960 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 2961 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 2962 2963 /* assemble coarse matrix */ 2964 if (isbddc || isnn) { 2965 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 2966 } else { 2967 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 2968 } 2969 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 2970 2971 /* create local to global scatters for coarse problem */ 2972 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2973 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2974 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 2975 2976 /* propagate symmetry info to coarse matrix */ 2977 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 2978 2979 /* Compute coarse null space (special handling by BDDC only) */ 2980 if (pcbddc->NullSpace) { 2981 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 2982 if (isbddc) { 2983 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 2984 } else { 2985 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 2986 } 2987 } 2988 2989 /* set operators */ 2990 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 2991 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 2992 2993 /* additional KSP customization */ 2994 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 2995 if (max_it < 5) { 2996 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 2997 } 2998 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 2999 3000 3001 /* print some info if requested */ 3002 if (pcbddc->dbg_flag) { 3003 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3004 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3005 if (!multilevel_allowed) { 3006 if (multilevel_requested) { 3007 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3008 } else if (pcbddc->max_levels) { 3009 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3010 } 3011 } 3012 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3013 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3014 } 3015 3016 /* setup coarse ksp */ 3017 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3018 if (pcbddc->dbg_flag) { 3019 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3020 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3021 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3022 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3023 } 3024 3025 /* Check coarse problem if requested */ 3026 if (pcbddc->dbg_flag) { 3027 KSP check_ksp; 3028 KSPType check_ksp_type; 3029 PC check_pc; 3030 Vec check_vec; 3031 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3032 PetscInt its; 3033 PetscBool ispreonly,compute; 3034 3035 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3036 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3037 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3038 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3039 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3040 if (ispreonly) { 3041 check_ksp_type = KSPPREONLY; 3042 compute = PETSC_FALSE; 3043 } else { 3044 if (issym) check_ksp_type = KSPCG; 3045 else check_ksp_type = KSPGMRES; 3046 compute = PETSC_TRUE; 3047 } 3048 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3049 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3050 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3051 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3052 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3053 /* create random vec */ 3054 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3055 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3056 if (CoarseNullSpace) { 3057 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3058 } 3059 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3060 /* solve coarse problem */ 3061 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3062 if (CoarseNullSpace) { 3063 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3064 } 3065 /* check coarse problem residual error */ 3066 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3067 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3068 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3069 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3070 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3071 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3072 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3073 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3074 /* get eigenvalue estimation if preonly has not been requested */ 3075 if (!ispreonly) { 3076 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3077 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3078 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3079 } 3080 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3081 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3082 } 3083 /* free memory */ 3084 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3085 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3086 PetscFunctionReturn(0); 3087 } 3088 3089 #undef __FUNCT__ 3090 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3091 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3092 { 3093 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3094 PC_IS* pcis = (PC_IS*)pc->data; 3095 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3096 PetscInt i,j,coarse_size; 3097 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3098 PetscErrorCode ierr; 3099 3100 PetscFunctionBegin; 3101 /* get indices in local ordering for vertices and constraints */ 3102 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3103 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3104 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3105 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3106 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3107 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3108 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3109 3110 /* Compute global number of coarse dofs */ 3111 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3112 3113 /* check numbering */ 3114 if (pcbddc->dbg_flag) { 3115 PetscScalar coarsesum,*array; 3116 3117 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3118 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3120 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3121 for (i=0;i<pcbddc->local_primal_size;i++) { 3122 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3123 } 3124 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3125 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3126 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3127 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3128 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3129 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3130 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3131 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3132 for (i=0;i<pcis->n;i++) { 3133 if (array[i] == 1.0) { 3134 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3135 } 3136 } 3137 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3138 for (i=0;i<pcis->n;i++) { 3139 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3140 } 3141 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3142 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3143 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3144 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3145 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3146 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3147 if (pcbddc->dbg_flag > 1) { 3148 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3149 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3150 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3151 for (i=0;i<pcbddc->local_primal_size;i++) { 3152 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]); 3153 } 3154 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3155 } 3156 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3157 } 3158 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3159 /* get back data */ 3160 *coarse_size_n = coarse_size; 3161 *local_primal_indices_n = local_primal_indices; 3162 PetscFunctionReturn(0); 3163 } 3164 3165