1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 pcbddc->n_ISForDofs = 0; 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "PCBDDCCreateWorkVectors" 111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 112 { 113 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 114 PC_IS *pcis = (PC_IS*)pc->data; 115 VecType impVecType; 116 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 117 PetscErrorCode ierr; 118 119 PetscFunctionBegin; 120 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 121 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 122 local_primal_size = n_constraints+n_vertices; 123 n_R = pcis->n-n_vertices; 124 /* local work vectors */ 125 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 126 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 127 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 128 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 129 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 130 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 131 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 132 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 133 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 134 if (n_constraints) { 135 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 136 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 137 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetUpCorrection" 144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 145 { 146 PetscErrorCode ierr; 147 /* pointers to pcis and pcbddc */ 148 PC_IS* pcis = (PC_IS*)pc->data; 149 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 150 /* submatrices of local problem */ 151 Mat A_RV,A_VR,A_VV; 152 /* working matrices */ 153 Mat M1,M2,M3,C_CR; 154 /* working vectors */ 155 Vec vec1_C,vec2_C,vec1_V,vec2_V; 156 /* additional working stuff */ 157 IS is_aux; 158 ISLocalToGlobalMapping BtoNmap; 159 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 160 const PetscScalar *array,*row_cmat_values; 161 const PetscInt *row_cmat_indices,*idx_R_local; 162 PetscInt *vertices,*idx_V_B,*auxindices; 163 PetscInt n_vertices,n_constraints,size_of_constraint; 164 PetscInt i,j,n_R,n_D,n_B; 165 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 166 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 167 MatType impMatType; 168 /* some shortcuts to scalars */ 169 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 170 /* for debugging purposes */ 171 PetscReal *coarsefunctions_errors,*constraints_errors; 172 173 PetscFunctionBegin; 174 /* get number of vertices and their local indices */ 175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 176 n_constraints = pcbddc->local_primal_size-n_vertices; 177 /* Set Non-overlapping dimensions */ 178 n_B = pcis->n_B; n_D = pcis->n - n_B; 179 n_R = pcis->n-n_vertices; 180 181 /* Set types for local objects needed by BDDC precondtioner */ 182 impMatType = MATSEQDENSE; 183 184 /* Allocating some extra storage just to be safe */ 185 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 186 for (i=0;i<pcis->n;i++) auxindices[i]=i; 187 188 /* vertices in boundary numbering */ 189 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 190 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 191 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 193 if (i != n_vertices) { 194 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 195 } 196 197 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 198 if (n_constraints) { 199 /* work vectors */ 200 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 201 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 202 /* auxiliary matrices */ 203 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 204 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 205 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 206 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 207 208 /* Extract constraints on R nodes: C_{CR} */ 209 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 210 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 211 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 212 213 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 214 for (i=0;i<n_constraints;i++) { 215 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 216 /* Get row of constraint matrix in R numbering */ 217 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 218 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 219 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 220 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 221 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 222 /* Solve for row of constraint matrix in R numbering */ 223 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 224 /* Set values in local_auxmat2 */ 225 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 226 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 227 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 228 } 229 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 230 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 231 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 232 233 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 234 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 235 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 236 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 237 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 238 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 239 ierr = MatSetUp(M1);CHKERRQ(ierr); 240 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 241 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 242 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 243 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 244 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 245 ierr = MatDestroy(&M2);CHKERRQ(ierr); 246 ierr = MatDestroy(&M3);CHKERRQ(ierr); 247 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 248 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 249 } 250 251 /* Get submatrices from subdomain matrix */ 252 if (n_vertices) { 253 PetscInt ibs,mbs; 254 PetscBool issbaij; 255 Mat newmat; 256 257 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 258 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 259 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 260 if (ibs != mbs) { /* need to convert to SEQAIJ */ 261 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 262 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 263 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 264 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 265 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 266 } else { 267 /* this is safe */ 268 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 269 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 270 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 271 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 272 /* which of the two approaches is faster? */ 273 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 274 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 275 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 276 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 277 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 278 } else { 279 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 280 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 281 } 282 } 283 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 284 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 285 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 286 } 287 288 /* Matrix of coarse basis functions (local) */ 289 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 290 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 291 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 292 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 293 if (pcbddc->switch_static || pcbddc->dbg_flag) { 294 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 295 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 296 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 297 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 298 } 299 300 if (pcbddc->dbg_flag) { 301 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 302 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 303 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 304 } 305 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 306 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 307 308 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 309 310 /* vertices */ 311 for (i=0;i<n_vertices;i++) { 312 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 313 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 314 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 315 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 316 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 317 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 318 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 319 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 320 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 321 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 322 if (n_constraints) { 323 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 324 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 325 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 326 } 327 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 328 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 329 330 /* Set values in coarse basis function and subdomain part of coarse_mat */ 331 /* coarse basis functions */ 332 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 333 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 334 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 335 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 336 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 337 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 338 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 339 if (pcbddc->switch_static || pcbddc->dbg_flag) { 340 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 341 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 342 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 343 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 344 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 345 } 346 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 347 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 348 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 350 if (n_constraints) { 351 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 352 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 354 } 355 356 /* check */ 357 if (pcbddc->dbg_flag) { 358 /* assemble subdomain vector on local nodes */ 359 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 360 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 361 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 362 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 363 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 364 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 365 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 366 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 367 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 368 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 369 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 370 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 371 if (n_constraints) { 372 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 373 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 374 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 375 } 376 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 378 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 379 /* check saddle point solution */ 380 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 381 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 382 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 383 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 384 /* shift by the identity matrix */ 385 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 386 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 387 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 388 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 389 } 390 } 391 392 /* constraints */ 393 for (i=0;i<n_constraints;i++) { 394 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 395 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 396 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 397 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 398 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 399 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 400 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 401 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 402 if (n_vertices) { 403 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 404 } 405 /* Set values in coarse basis function and subdomain part of coarse_mat */ 406 /* coarse basis functions */ 407 j = i+n_vertices; /* don't touch this! */ 408 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 409 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 410 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 411 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 412 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 414 if (pcbddc->switch_static || pcbddc->dbg_flag) { 415 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 416 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 417 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 418 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 420 } 421 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 422 if (n_vertices) { 423 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 424 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 426 } 427 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 428 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 430 431 if (pcbddc->dbg_flag) { 432 /* assemble subdomain vector on nodes */ 433 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 434 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 437 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 439 /* assemble subdomain vector of lagrange multipliers */ 440 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 441 if (n_vertices) { 442 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 443 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 444 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 445 } 446 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 447 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 448 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 449 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 452 /* check saddle point solution */ 453 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 454 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 456 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 457 /* shift by the identity matrix */ 458 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 459 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 460 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 461 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 462 } 463 } 464 /* call assembling routines for local coarse basis */ 465 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 if (pcbddc->switch_static || pcbddc->dbg_flag) { 468 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 469 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 470 } 471 472 /* compute other basis functions for non-symmetric problems */ 473 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 474 if (!setsym || (setsym && !issym)) { 475 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 476 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 477 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 478 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 481 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 482 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 483 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 484 } 485 for (i=0;i<pcbddc->local_primal_size;i++) { 486 if (n_constraints) { 487 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 488 for (j=0;j<n_constraints;j++) { 489 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 490 } 491 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 492 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 493 } 494 if (i<n_vertices) { 495 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 496 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 499 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 500 if (n_constraints) { 501 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 502 } 503 } else { 504 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 505 } 506 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 507 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 508 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 509 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 510 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 511 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 512 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 513 if (i<n_vertices) { 514 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 515 } 516 if (pcbddc->switch_static || pcbddc->dbg_flag) { 517 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 519 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 520 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 521 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 522 } 523 524 if (pcbddc->dbg_flag) { 525 /* assemble subdomain vector on nodes */ 526 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 527 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 528 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 529 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 530 if (i<n_vertices) { 531 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 532 } 533 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 534 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 535 /* assemble subdomain vector of lagrange multipliers */ 536 for (j=0;j<pcbddc->local_primal_size;j++) { 537 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 538 } 539 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 540 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 541 /* check saddle point solution */ 542 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 543 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 544 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 545 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 546 /* shift by the identity matrix */ 547 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 548 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 549 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 550 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 551 } 552 } 553 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 if (pcbddc->switch_static || pcbddc->dbg_flag) { 556 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 } 559 } 560 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 561 /* Checking coarse_sub_mat and coarse basis functios */ 562 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 563 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 564 if (pcbddc->dbg_flag) { 565 Mat coarse_sub_mat; 566 Mat AUXMAT,TM1,TM2,TM3,TM4; 567 Mat coarse_phi_D,coarse_phi_B; 568 Mat coarse_psi_D,coarse_psi_B; 569 Mat A_II,A_BB,A_IB,A_BI; 570 MatType checkmattype=MATSEQAIJ; 571 PetscReal real_value; 572 573 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 574 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 575 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 576 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 577 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 578 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 579 if (pcbddc->coarse_psi_B) { 580 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 581 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 582 } 583 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 584 585 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 586 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 587 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 588 if (pcbddc->coarse_psi_B) { 589 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 590 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 591 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 592 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 593 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 594 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 595 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 596 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 597 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 598 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 599 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 600 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 601 } else { 602 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 603 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 604 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 605 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 606 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 607 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 608 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 609 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 610 } 611 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 612 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 613 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 614 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 615 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 616 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 621 for (i=0;i<pcbddc->local_primal_size;i++) { 622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 623 } 624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 625 for (i=0;i<pcbddc->local_primal_size;i++) { 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 627 } 628 if (pcbddc->coarse_psi_B) { 629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 630 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 632 } 633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 634 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 636 } 637 } 638 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 639 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 640 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 641 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 642 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 643 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 644 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 645 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 646 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 647 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 648 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 649 if (pcbddc->coarse_psi_B) { 650 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 651 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 652 } 653 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 654 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 655 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 656 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 657 } 658 /* free memory */ 659 if (n_vertices) { 660 ierr = PetscFree(vertices);CHKERRQ(ierr); 661 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 662 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 663 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 664 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 665 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 666 } 667 if (n_constraints) { 668 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 669 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 670 ierr = MatDestroy(&M1);CHKERRQ(ierr); 671 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 672 } 673 ierr = PetscFree(auxindices);CHKERRQ(ierr); 674 /* get back data */ 675 *coarse_submat_vals_n = coarse_submat_vals; 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 681 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 682 { 683 PC_IS* pcis = (PC_IS*)(pc->data); 684 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 685 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 686 PetscBool issbaij,isbaij; 687 /* manage repeated solves */ 688 MatReuse reuse; 689 MatStructure matstruct; 690 PetscErrorCode ierr; 691 692 PetscFunctionBegin; 693 /* get mat flags */ 694 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 695 reuse = MAT_INITIAL_MATRIX; 696 if (pc->setupcalled) { 697 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 698 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 699 if (matstruct == SAME_NONZERO_PATTERN) { 700 reuse = MAT_REUSE_MATRIX; 701 } else { 702 reuse = MAT_INITIAL_MATRIX; 703 } 704 } 705 if (reuse == MAT_INITIAL_MATRIX) { 706 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 707 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 708 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 709 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 710 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 711 } 712 713 /* transform local matrices if needed */ 714 if (pcbddc->use_change_of_basis) { 715 Mat change_mat_all; 716 PetscScalar *row_cmat_values; 717 PetscInt *row_cmat_indices; 718 PetscInt *nnz,*is_indices,*temp_indices; 719 PetscInt i,j,k,n_D,n_B; 720 721 /* Get Non-overlapping dimensions */ 722 n_B = pcis->n_B; 723 n_D = pcis->n-n_B; 724 725 /* compute nonzero structure of change of basis on all local nodes */ 726 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 727 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 728 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 729 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 730 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 731 k=1; 732 for (i=0;i<n_B;i++) { 733 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 734 nnz[is_indices[i]]=j; 735 if (k < j) k = j; 736 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 737 } 738 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 739 /* assemble change of basis matrix on the whole set of local dofs */ 740 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 741 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 742 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 743 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 744 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 745 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 746 for (i=0;i<n_D;i++) { 747 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 748 } 749 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 750 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 751 for (i=0;i<n_B;i++) { 752 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 753 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 754 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 755 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 756 } 757 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 758 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 759 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 760 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 761 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 762 if (!issbaij && !isbaij) { 763 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 764 } else { 765 Mat work_mat; 766 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 767 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 768 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 769 } 770 /* 771 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 772 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 773 */ 774 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 775 ierr = PetscFree(nnz);CHKERRQ(ierr); 776 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 777 } else { 778 /* without change of basis, the local matrix is unchanged */ 779 if (!pcbddc->local_mat) { 780 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 781 pcbddc->local_mat = matis->A; 782 } 783 } 784 785 /* get submatrices */ 786 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 787 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 788 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 789 if (!issbaij) { 790 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 791 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 792 } else { 793 Mat newmat; 794 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 795 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 796 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 797 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 804 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 805 { 806 PC_IS* pcis = (PC_IS*)(pc->data); 807 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 808 IS is_aux1,is_aux2; 809 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 810 PetscInt n_vertices,i,j,n_R,n_D,n_B; 811 PetscInt vbs,bs; 812 PetscBT bitmask; 813 PetscErrorCode ierr; 814 815 PetscFunctionBegin; 816 /* Set Non-overlapping dimensions */ 817 n_B = pcis->n_B; n_D = pcis->n - n_B; 818 /* get vertex indices from constraint matrix */ 819 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 820 821 /* create auxiliary bitmask */ 822 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 823 for (i=0;i<n_vertices;i++) { 824 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 825 } 826 827 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 828 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 829 for (i=0, n_R=0; i<pcis->n; i++) { 830 if (!PetscBTLookup(bitmask,i)) { 831 idx_R_local[n_R] = i; 832 n_R++; 833 } 834 } 835 836 /* Block code */ 837 vbs = 1; 838 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 839 if (bs>1 && !(n_vertices%bs)) { 840 PetscBool is_blocked = PETSC_TRUE; 841 PetscInt *vary; 842 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 843 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 844 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 845 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 846 for (i=0; i<n_vertices; i++) { 847 if (vary[i]!=0 && vary[i]!=bs) { 848 is_blocked = PETSC_FALSE; 849 break; 850 } 851 } 852 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 853 vbs = bs; 854 for (i=0;i<n_R/vbs;i++) { 855 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 856 } 857 } 858 ierr = PetscFree(vary);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(vertices);CHKERRQ(ierr); 861 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 862 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 863 864 /* print some info if requested */ 865 if (pcbddc->dbg_flag) { 866 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 867 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 868 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 869 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 870 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 872 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 873 } 874 875 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 876 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 877 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 878 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 879 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 880 for (i=0; i<n_D; i++) { 881 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 882 } 883 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 884 for (i=0, j=0; i<n_R; i++) { 885 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 886 aux_array1[j++] = i; 887 } 888 } 889 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 890 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 891 for (i=0, j=0; i<n_B; i++) { 892 if (!PetscBTLookup(bitmask,is_indices[i])) { 893 aux_array2[j++] = i; 894 } 895 } 896 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 897 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 898 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 899 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 900 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 901 902 if (pcbddc->switch_static || pcbddc->dbg_flag) { 903 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 904 for (i=0, j=0; i<n_R; i++) { 905 if (PetscBTLookup(bitmask,idx_R_local[i])) { 906 aux_array1[j++] = i; 907 } 908 } 909 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 910 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 911 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 912 } 913 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 914 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 919 #undef __FUNCT__ 920 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 921 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 922 { 923 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 924 PC_IS *pcis = (PC_IS*)pc->data; 925 PC pc_temp; 926 Mat A_RR; 927 Vec vec1,vec2,vec3; 928 MatStructure matstruct; 929 PetscScalar m_one = -1.0; 930 PetscReal value; 931 PetscInt n_D,n_R,ibs,mbs; 932 PetscBool use_exact,use_exact_reduced,issbaij; 933 PetscErrorCode ierr; 934 /* prefixes stuff */ 935 char dir_prefix[256],neu_prefix[256],str_level[3]; 936 size_t len; 937 938 PetscFunctionBegin; 939 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 940 941 /* compute prefixes */ 942 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 943 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 944 if (!pcbddc->current_level) { 945 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 946 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 947 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 948 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 949 } else { 950 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 951 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 952 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 953 len -= 15; /* remove "pc_bddc_coarse_" */ 954 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 955 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 956 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 957 *(dir_prefix+len)='\0'; 958 *(neu_prefix+len)='\0'; 959 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 960 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 961 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 962 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 963 } 964 965 /* DIRICHLET PROBLEM */ 966 /* Matrix for Dirichlet problem is pcis->A_II */ 967 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 968 if (!pcbddc->ksp_D) { /* create object if not yet build */ 969 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 970 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 971 /* default */ 972 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 973 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 974 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 975 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 976 if (issbaij) { 977 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 978 } else { 979 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 980 } 981 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 982 } 983 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 984 /* Allow user's customization */ 985 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 986 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 987 if (!n_D) { 988 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 989 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 990 } 991 /* Set Up KSP for Dirichlet problem of BDDC */ 992 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 993 /* set ksp_D into pcis data */ 994 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 995 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 996 pcis->ksp_D = pcbddc->ksp_D; 997 998 /* NEUMANN PROBLEM */ 999 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1000 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1001 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1002 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1003 if (ibs != mbs) { 1004 Mat newmat; 1005 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1006 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1007 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1008 } else { 1009 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1010 } 1011 if (!pcbddc->ksp_R) { /* create object if not yet build */ 1012 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1013 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1014 /* default */ 1015 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1016 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1017 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1018 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1019 if (issbaij) { 1020 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1021 } else { 1022 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1023 } 1024 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1025 } 1026 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1027 /* Allow user's customization */ 1028 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1029 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1030 if (!n_R) { 1031 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1032 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1033 } 1034 /* Set Up KSP for Neumann problem of BDDC */ 1035 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1036 1037 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1038 1039 /* Dirichlet */ 1040 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1041 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1042 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1043 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1044 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1045 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1046 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1047 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1048 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1049 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1050 /* need to be adapted? */ 1051 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1052 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1053 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1054 /* print info */ 1055 if (pcbddc->dbg_flag) { 1056 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1057 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1058 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1059 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1060 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1061 } 1062 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1063 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1064 } 1065 1066 /* Neumann */ 1067 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1068 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1069 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1070 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1071 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1072 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1073 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1074 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1075 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1076 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1077 /* need to be adapted? */ 1078 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1079 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1080 /* print info */ 1081 if (pcbddc->dbg_flag) { 1082 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1083 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1084 } 1085 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1086 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1087 } 1088 /* free Neumann problem's matrix */ 1089 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1095 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1096 { 1097 PetscErrorCode ierr; 1098 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1099 1100 PetscFunctionBegin; 1101 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1102 if (pcbddc->local_auxmat1) { 1103 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1104 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1105 } 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1111 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1112 { 1113 PetscErrorCode ierr; 1114 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1115 PC_IS* pcis = (PC_IS*) (pc->data); 1116 const PetscScalar zero = 0.0; 1117 1118 PetscFunctionBegin; 1119 /* Application of PHI^T (or PSI^T) */ 1120 if (pcbddc->coarse_psi_B) { 1121 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1122 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1123 } else { 1124 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1125 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1126 } 1127 /* Scatter data of coarse_rhs */ 1128 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1129 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1130 1131 /* Local solution on R nodes */ 1132 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1133 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1134 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1135 if (pcbddc->switch_static) { 1136 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1137 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 } 1139 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1140 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1141 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 if (pcbddc->switch_static) { 1144 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1145 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1146 } 1147 1148 /* Coarse solution */ 1149 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1150 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1151 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1152 } 1153 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1154 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 1156 /* Sum contributions from two levels */ 1157 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1158 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1164 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1165 { 1166 PetscErrorCode ierr; 1167 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1168 1169 PetscFunctionBegin; 1170 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1176 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1177 { 1178 PetscErrorCode ierr; 1179 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1180 1181 PetscFunctionBegin; 1182 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 /* uncomment for testing purposes */ 1187 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1190 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1191 { 1192 PetscErrorCode ierr; 1193 PC_IS* pcis = (PC_IS*)(pc->data); 1194 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1195 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1196 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1197 MatType impMatType=MATSEQAIJ; 1198 /* one and zero */ 1199 PetscScalar one=1.0,zero=0.0; 1200 /* space to store constraints and their local indices */ 1201 PetscScalar *temp_quadrature_constraint; 1202 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1203 /* iterators */ 1204 PetscInt i,j,k,total_counts,temp_start_ptr; 1205 /* stuff to store connected components stored in pcbddc->mat_graph */ 1206 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1207 PetscInt n_ISForFaces,n_ISForEdges; 1208 /* near null space stuff */ 1209 MatNullSpace nearnullsp; 1210 const Vec *nearnullvecs; 1211 Vec *localnearnullsp; 1212 PetscBool nnsp_has_cnst; 1213 PetscInt nnsp_size; 1214 PetscScalar *array; 1215 /* BLAS integers */ 1216 PetscBLASInt lwork,lierr; 1217 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1218 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1219 /* LAPACK working arrays for SVD or POD */ 1220 PetscBool skip_lapack; 1221 PetscScalar *work; 1222 PetscReal *singular_vals; 1223 #if defined(PETSC_USE_COMPLEX) 1224 PetscReal *rwork; 1225 #endif 1226 #if defined(PETSC_MISSING_LAPACK_GESVD) 1227 PetscBLASInt Blas_one_2=1; 1228 PetscScalar *temp_basis,*correlation_mat; 1229 #else 1230 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1231 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1232 #endif 1233 /* change of basis */ 1234 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1235 PetscBool boolforchange,qr_needed; 1236 PetscBT touched,change_basis,qr_needed_idx; 1237 /* auxiliary stuff */ 1238 PetscInt *nnz,*is_indices,*local_to_B; 1239 /* some quantities */ 1240 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1241 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1242 1243 1244 PetscFunctionBegin; 1245 /* Get index sets for faces, edges and vertices from graph */ 1246 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1247 pcbddc->use_vertices = PETSC_TRUE; 1248 } 1249 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1250 /* HACK: provide functions to set change of basis */ 1251 if (!ISForVertices && pcbddc->NullSpace) { 1252 pcbddc->use_change_of_basis = PETSC_TRUE; 1253 pcbddc->use_change_on_faces = PETSC_FALSE; 1254 } 1255 /* print some info */ 1256 if (pcbddc->dbg_flag) { 1257 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1258 i = 0; 1259 if (ISForVertices) { 1260 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1261 } 1262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1265 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1266 } 1267 /* check if near null space is attached to global mat */ 1268 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1269 if (nearnullsp) { 1270 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1271 } else { /* if near null space is not provided BDDC uses constants by default */ 1272 nnsp_size = 0; 1273 nnsp_has_cnst = PETSC_TRUE; 1274 } 1275 /* get max number of constraints on a single cc */ 1276 max_constraints = nnsp_size; 1277 if (nnsp_has_cnst) max_constraints++; 1278 1279 /* 1280 Evaluate maximum storage size needed by the procedure 1281 - temp_indices will contain start index of each constraint stored as follows 1282 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1283 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1284 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1285 */ 1286 total_counts = n_ISForFaces+n_ISForEdges; 1287 total_counts *= max_constraints; 1288 n_vertices = 0; 1289 if (ISForVertices) { 1290 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1291 } 1292 total_counts += n_vertices; 1293 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1294 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1295 total_counts = 0; 1296 max_size_of_constraint = 0; 1297 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1298 if (i<n_ISForEdges) { 1299 used_IS = &ISForEdges[i]; 1300 } else { 1301 used_IS = &ISForFaces[i-n_ISForEdges]; 1302 } 1303 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1304 total_counts += j; 1305 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1306 } 1307 total_counts *= max_constraints; 1308 total_counts += n_vertices; 1309 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1310 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1311 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1312 /* local to boundary numbering */ 1313 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1314 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1315 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1316 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1317 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1318 /* get local part of global near null space vectors */ 1319 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1320 for (k=0;k<nnsp_size;k++) { 1321 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1322 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1323 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1324 } 1325 1326 /* whether or not to skip lapack calls */ 1327 skip_lapack = PETSC_TRUE; 1328 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1329 1330 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1331 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1332 PetscScalar temp_work; 1333 #if defined(PETSC_MISSING_LAPACK_GESVD) 1334 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1335 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1336 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1337 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1338 #if defined(PETSC_USE_COMPLEX) 1339 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1340 #endif 1341 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1342 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1343 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1344 lwork = -1; 1345 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1346 #if !defined(PETSC_USE_COMPLEX) 1347 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1348 #else 1349 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1350 #endif 1351 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1352 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1353 #else /* on missing GESVD */ 1354 /* SVD */ 1355 PetscInt max_n,min_n; 1356 max_n = max_size_of_constraint; 1357 min_n = max_constraints; 1358 if (max_size_of_constraint < max_constraints) { 1359 min_n = max_size_of_constraint; 1360 max_n = max_constraints; 1361 } 1362 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1363 #if defined(PETSC_USE_COMPLEX) 1364 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1365 #endif 1366 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1367 lwork = -1; 1368 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1369 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1370 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1371 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1372 #if !defined(PETSC_USE_COMPLEX) 1373 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1374 #else 1375 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1376 #endif 1377 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1378 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1379 #endif /* on missing GESVD */ 1380 /* Allocate optimal workspace */ 1381 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1382 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1383 } 1384 /* Now we can loop on constraining sets */ 1385 total_counts = 0; 1386 temp_indices[0] = 0; 1387 /* vertices */ 1388 if (ISForVertices) { 1389 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1390 if (nnsp_has_cnst) { /* consider all vertices */ 1391 for (i=0;i<n_vertices;i++) { 1392 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1393 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1394 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1395 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1396 total_counts++; 1397 } 1398 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1399 PetscBool used_vertex; 1400 for (i=0;i<n_vertices;i++) { 1401 used_vertex = PETSC_FALSE; 1402 k = 0; 1403 while (!used_vertex && k<nnsp_size) { 1404 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1405 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1406 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1407 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1408 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1409 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1410 total_counts++; 1411 used_vertex = PETSC_TRUE; 1412 } 1413 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1414 k++; 1415 } 1416 } 1417 } 1418 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1419 n_vertices = total_counts; 1420 } 1421 1422 /* edges and faces */ 1423 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1424 if (i<n_ISForEdges) { 1425 used_IS = &ISForEdges[i]; 1426 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1427 } else { 1428 used_IS = &ISForFaces[i-n_ISForEdges]; 1429 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1430 } 1431 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1432 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1433 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1434 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1435 /* change of basis should not be performed on local periodic nodes */ 1436 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1437 if (nnsp_has_cnst) { 1438 PetscScalar quad_value; 1439 temp_constraints++; 1440 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1441 for (j=0;j<size_of_constraint;j++) { 1442 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1443 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1444 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1445 } 1446 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1447 total_counts++; 1448 } 1449 for (k=0;k<nnsp_size;k++) { 1450 PetscReal real_value; 1451 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1452 for (j=0;j<size_of_constraint;j++) { 1453 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1454 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1455 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1456 } 1457 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1458 /* check if array is null on the connected component */ 1459 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1460 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1461 if (real_value > 0.0) { /* keep indices and values */ 1462 temp_constraints++; 1463 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1464 total_counts++; 1465 } 1466 } 1467 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1468 valid_constraints = temp_constraints; 1469 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1470 if (!pcbddc->use_nnsp_true && temp_constraints) { 1471 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1472 1473 #if defined(PETSC_MISSING_LAPACK_GESVD) 1474 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1475 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1476 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1477 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1478 from that computed using LAPACKgesvd 1479 -> This is due to a different computation of eigenvectors in LAPACKheev 1480 -> The quality of the POD-computed basis will be the same */ 1481 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1482 /* Store upper triangular part of correlation matrix */ 1483 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1484 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1485 for (j=0;j<temp_constraints;j++) { 1486 for (k=0;k<j+1;k++) { 1487 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1488 } 1489 } 1490 /* compute eigenvalues and eigenvectors of correlation matrix */ 1491 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1492 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1493 #if !defined(PETSC_USE_COMPLEX) 1494 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1495 #else 1496 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1497 #endif 1498 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1499 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1500 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1501 j=0; 1502 while (j < temp_constraints && singular_vals[j] < tol) j++; 1503 total_counts=total_counts-j; 1504 valid_constraints = temp_constraints-j; 1505 /* scale and copy POD basis into used quadrature memory */ 1506 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1507 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1508 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1509 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1510 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1511 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1512 if (j<temp_constraints) { 1513 PetscInt ii; 1514 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1515 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1516 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1517 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1518 for (k=0;k<temp_constraints-j;k++) { 1519 for (ii=0;ii<size_of_constraint;ii++) { 1520 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1521 } 1522 } 1523 } 1524 #else /* on missing GESVD */ 1525 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1526 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1527 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1528 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1529 #if !defined(PETSC_USE_COMPLEX) 1530 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1531 #else 1532 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1533 #endif 1534 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1535 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1536 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1537 k = temp_constraints; 1538 if (k > size_of_constraint) k = size_of_constraint; 1539 j = 0; 1540 while (j < k && singular_vals[k-j-1] < tol) j++; 1541 total_counts = total_counts-temp_constraints+k-j; 1542 valid_constraints = k-j; 1543 #endif /* on missing GESVD */ 1544 } 1545 /* setting change_of_basis flag is safe now */ 1546 if (boolforchange) { 1547 for (j=0;j<valid_constraints;j++) { 1548 PetscBTSet(change_basis,total_counts-j-1); 1549 } 1550 } 1551 } 1552 /* free index sets of faces, edges and vertices */ 1553 for (i=0;i<n_ISForFaces;i++) { 1554 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1555 } 1556 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1557 for (i=0;i<n_ISForEdges;i++) { 1558 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1559 } 1560 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1561 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1562 1563 /* free workspace */ 1564 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1565 ierr = PetscFree(work);CHKERRQ(ierr); 1566 #if defined(PETSC_USE_COMPLEX) 1567 ierr = PetscFree(rwork);CHKERRQ(ierr); 1568 #endif 1569 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1570 #if defined(PETSC_MISSING_LAPACK_GESVD) 1571 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1572 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1573 #endif 1574 } 1575 for (k=0;k<nnsp_size;k++) { 1576 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1577 } 1578 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1579 1580 /* set quantities in pcbddc data structure */ 1581 /* n_vertices defines the number of subdomain corners in the primal space */ 1582 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1583 pcbddc->local_primal_size = total_counts; 1584 pcbddc->n_vertices = n_vertices; 1585 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1586 1587 /* Create constraint matrix */ 1588 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1589 /* so we need to set it up properly either with or without change of basis */ 1590 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1591 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1592 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1593 /* array to compute a local numbering of constraints : vertices first then constraints */ 1594 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1595 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1596 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1597 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1598 /* auxiliary stuff for basis change */ 1599 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1600 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1601 1602 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1603 total_primal_vertices=0; 1604 for (i=0;i<pcbddc->local_primal_size;i++) { 1605 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1606 if (size_of_constraint == 1) { 1607 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1608 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1609 aux_primal_minloc[total_primal_vertices]=0; 1610 total_primal_vertices++; 1611 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1612 PetscInt min_loc,min_index; 1613 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1614 /* find first untouched local node */ 1615 k = 0; 1616 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1617 min_index = global_indices[k]; 1618 min_loc = k; 1619 /* search the minimum among global nodes already untouched on the cc */ 1620 for (k=1;k<size_of_constraint;k++) { 1621 /* there can be more than one constraint on a single connected component */ 1622 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1623 min_index = global_indices[k]; 1624 min_loc = k; 1625 } 1626 } 1627 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1628 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1629 aux_primal_minloc[total_primal_vertices]=min_loc; 1630 total_primal_vertices++; 1631 } 1632 } 1633 /* determine if a QR strategy is needed for change of basis */ 1634 qr_needed = PETSC_FALSE; 1635 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1636 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1637 if (PetscBTLookup(change_basis,i)) { 1638 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1639 j = 0; 1640 for (k=0;k<size_of_constraint;k++) { 1641 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1642 j++; 1643 } 1644 } 1645 /* found more than one primal dof on the cc */ 1646 if (j > 1) { 1647 PetscBTSet(qr_needed_idx,i); 1648 qr_needed = PETSC_TRUE; 1649 } 1650 } 1651 } 1652 /* free workspace */ 1653 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1654 1655 /* permute indices in order to have a sorted set of vertices */ 1656 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1657 1658 /* nonzero structure of constraint matrix */ 1659 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1660 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1661 j=total_primal_vertices; 1662 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1663 if (!PetscBTLookup(change_basis,i)) { 1664 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1665 j++; 1666 } 1667 } 1668 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1669 ierr = PetscFree(nnz);CHKERRQ(ierr); 1670 /* set values in constraint matrix */ 1671 for (i=0;i<total_primal_vertices;i++) { 1672 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1673 } 1674 total_counts = total_primal_vertices; 1675 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1676 if (!PetscBTLookup(change_basis,i)) { 1677 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1678 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1679 total_counts++; 1680 } 1681 } 1682 /* assembling */ 1683 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1684 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1685 /* 1686 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1687 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1688 */ 1689 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1690 if (pcbddc->use_change_of_basis) { 1691 /* dual and primal dofs on a single cc */ 1692 PetscInt dual_dofs,primal_dofs; 1693 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1694 PetscInt primal_counter; 1695 /* working stuff for GEQRF */ 1696 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1697 PetscBLASInt lqr_work; 1698 /* working stuff for UNGQR */ 1699 PetscScalar *gqr_work,lgqr_work_t; 1700 PetscBLASInt lgqr_work; 1701 /* working stuff for TRTRS */ 1702 PetscScalar *trs_rhs; 1703 PetscBLASInt Blas_NRHS; 1704 /* pointers for values insertion into change of basis matrix */ 1705 PetscInt *start_rows,*start_cols; 1706 PetscScalar *start_vals; 1707 /* working stuff for values insertion */ 1708 PetscBT is_primal; 1709 1710 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1711 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1712 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1713 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1714 /* work arrays */ 1715 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1716 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1717 /* nonzeros per row */ 1718 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1719 if (PetscBTLookup(change_basis,i)) { 1720 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1721 if (PetscBTLookup(qr_needed_idx,i)) { 1722 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1723 } else { 1724 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1725 /* get local primal index on the cc */ 1726 j = 0; 1727 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1728 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1729 } 1730 } 1731 } 1732 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1733 ierr = PetscFree(nnz);CHKERRQ(ierr); 1734 /* Set initial identity in the matrix */ 1735 for (i=0;i<pcis->n_B;i++) { 1736 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1737 } 1738 1739 if (pcbddc->dbg_flag) { 1740 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1741 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1742 } 1743 1744 1745 /* Now we loop on the constraints which need a change of basis */ 1746 /* 1747 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1748 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1749 1750 Basic blocks of change of basis matrix T computed 1751 1752 - Using the following block transformation if there is only a primal dof on the cc 1753 (in the example, primal dof is the last one of the edge in LOCAL ordering 1754 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1755 | 1 0 ... 0 1 | 1756 | 0 1 ... 0 1 | 1757 | ... | 1758 | 0 ... 1 1 | 1759 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1760 1761 - via QR decomposition of constraints otherwise 1762 */ 1763 if (qr_needed) { 1764 /* space to store Q */ 1765 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1766 /* first we issue queries for optimal work */ 1767 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1768 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1769 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1770 lqr_work = -1; 1771 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1772 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1773 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1774 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1775 lgqr_work = -1; 1776 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1777 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1778 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1779 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1780 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1781 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1782 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1783 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1784 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1785 /* array to store scaling factors for reflectors */ 1786 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1787 /* array to store rhs and solution of triangular solver */ 1788 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1789 /* allocating workspace for check */ 1790 if (pcbddc->dbg_flag) { 1791 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1792 } 1793 } 1794 /* array to store whether a node is primal or not */ 1795 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1796 for (i=0;i<total_primal_vertices;i++) { 1797 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1798 } 1799 1800 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1801 /* -> using implicit ordering contained in temp_indices data */ 1802 total_counts = pcbddc->n_vertices; 1803 primal_counter = total_counts; 1804 while (total_counts<pcbddc->local_primal_size) { 1805 primal_dofs = 1; 1806 if (PetscBTLookup(change_basis,total_counts)) { 1807 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1808 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1809 primal_dofs++; 1810 } 1811 /* get constraint info */ 1812 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1813 dual_dofs = size_of_constraint-primal_dofs; 1814 1815 if (pcbddc->dbg_flag) { 1816 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1817 } 1818 1819 if (primal_dofs > 1) { /* QR */ 1820 1821 /* copy quadrature constraints for change of basis check */ 1822 if (pcbddc->dbg_flag) { 1823 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1824 } 1825 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1826 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1827 1828 /* compute QR decomposition of constraints */ 1829 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1830 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1831 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1832 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1833 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1834 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1835 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1836 1837 /* explictly compute R^-T */ 1838 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1839 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1840 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1841 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1842 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1843 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1844 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1845 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1846 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1847 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1848 1849 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1850 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1851 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1852 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1853 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1854 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1855 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1856 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1857 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1858 1859 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1860 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1861 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1862 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1863 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1864 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1865 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1867 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1868 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1869 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1870 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1871 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1872 1873 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1874 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1875 /* insert cols for primal dofs */ 1876 for (j=0;j<primal_dofs;j++) { 1877 start_vals = &qr_basis[j*size_of_constraint]; 1878 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1879 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1880 } 1881 /* insert cols for dual dofs */ 1882 for (j=0,k=0;j<dual_dofs;k++) { 1883 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1884 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1885 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1886 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1887 j++; 1888 } 1889 } 1890 1891 /* check change of basis */ 1892 if (pcbddc->dbg_flag) { 1893 PetscInt ii,jj; 1894 PetscBool valid_qr=PETSC_TRUE; 1895 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1896 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1897 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1898 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1899 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1900 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1901 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1902 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1903 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1904 for (jj=0;jj<size_of_constraint;jj++) { 1905 for (ii=0;ii<primal_dofs;ii++) { 1906 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1907 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1908 } 1909 } 1910 if (!valid_qr) { 1911 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1912 for (jj=0;jj<size_of_constraint;jj++) { 1913 for (ii=0;ii<primal_dofs;ii++) { 1914 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1915 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1916 } 1917 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1918 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1919 } 1920 } 1921 } 1922 } else { 1923 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1924 } 1925 } 1926 } else { /* simple transformation block */ 1927 PetscInt row,col; 1928 PetscScalar val; 1929 for (j=0;j<size_of_constraint;j++) { 1930 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1931 if (!PetscBTLookup(is_primal,row)) { 1932 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1933 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1934 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1935 } else { 1936 for (k=0;k<size_of_constraint;k++) { 1937 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1938 if (row != col) { 1939 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1940 } else { 1941 val = 1.0; 1942 } 1943 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1944 } 1945 } 1946 } 1947 } 1948 /* increment primal counter */ 1949 primal_counter += primal_dofs; 1950 } else { 1951 if (pcbddc->dbg_flag) { 1952 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1953 } 1954 } 1955 /* increment constraint counter total_counts */ 1956 total_counts += primal_dofs; 1957 } 1958 1959 /* free workspace */ 1960 if (qr_needed) { 1961 if (pcbddc->dbg_flag) { 1962 ierr = PetscFree(work);CHKERRQ(ierr); 1963 } 1964 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1965 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1966 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1967 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1968 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1969 } 1970 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1971 /* assembling */ 1972 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1973 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1974 /* 1975 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1976 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1977 */ 1978 } 1979 1980 /* flush dbg viewer */ 1981 if (pcbddc->dbg_flag) { 1982 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1983 } 1984 1985 /* free workspace */ 1986 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1987 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 1988 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1989 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1990 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1991 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1992 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1993 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1994 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1995 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2001 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2002 { 2003 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2004 PC_IS *pcis = (PC_IS*)pc->data; 2005 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2006 PetscInt bs,ierr,i,vertex_size; 2007 PetscViewer viewer=pcbddc->dbg_viewer; 2008 2009 PetscFunctionBegin; 2010 /* Init local Graph struct */ 2011 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2012 2013 /* Check validity of the csr graph passed in by the user */ 2014 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2015 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2016 } 2017 2018 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2019 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2020 Mat mat_adj; 2021 const PetscInt *xadj,*adjncy; 2022 PetscBool flg_row=PETSC_TRUE; 2023 2024 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2025 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2026 if (!flg_row) { 2027 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2028 } 2029 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2030 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2031 if (!flg_row) { 2032 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2033 } 2034 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2035 } 2036 2037 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2038 vertex_size = 1; 2039 if (!pcbddc->user_provided_isfordofs) { 2040 if (!pcbddc->n_ISForDofs) { 2041 IS *custom_ISForDofs; 2042 2043 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2044 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2045 for (i=0;i<bs;i++) { 2046 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2047 } 2048 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2049 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2050 /* remove my references to IS objects */ 2051 for (i=0;i<bs;i++) { 2052 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2053 } 2054 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2055 } 2056 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2057 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2058 } 2059 2060 /* Setup of Graph */ 2061 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2062 2063 /* Graph's connected components analysis */ 2064 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2065 2066 /* print some info to stdout */ 2067 if (pcbddc->dbg_flag) { 2068 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2069 } 2070 PetscFunctionReturn(0); 2071 } 2072 2073 #undef __FUNCT__ 2074 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2075 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2076 { 2077 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2078 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2079 PetscErrorCode ierr; 2080 2081 PetscFunctionBegin; 2082 n = 0; 2083 vertices = 0; 2084 if (pcbddc->ConstraintMatrix) { 2085 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2086 for (i=0;i<local_primal_size;i++) { 2087 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2088 if (size_of_constraint == 1) n++; 2089 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2090 } 2091 if (vertices_idx) { 2092 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2093 n = 0; 2094 for (i=0;i<local_primal_size;i++) { 2095 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2096 if (size_of_constraint == 1) { 2097 vertices[n++]=row_cmat_indices[0]; 2098 } 2099 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2100 } 2101 } 2102 } 2103 *n_vertices = n; 2104 if (vertices_idx) *vertices_idx = vertices; 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2110 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2111 { 2112 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2113 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2114 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2115 PetscBT touched; 2116 PetscErrorCode ierr; 2117 2118 /* This function assumes that the number of local constraints per connected component 2119 is not greater than the number of nodes defined for the connected component 2120 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2121 PetscFunctionBegin; 2122 n = 0; 2123 constraints_index = 0; 2124 if (pcbddc->ConstraintMatrix) { 2125 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2126 max_size_of_constraint = 0; 2127 for (i=0;i<local_primal_size;i++) { 2128 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2129 if (size_of_constraint > 1) { 2130 n++; 2131 } 2132 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2133 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2134 } 2135 if (constraints_idx) { 2136 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2137 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2138 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2139 n = 0; 2140 for (i=0;i<local_primal_size;i++) { 2141 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2142 if (size_of_constraint > 1) { 2143 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2144 /* find first untouched local node */ 2145 j = 0; 2146 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2147 min_index = row_cmat_global_indices[j]; 2148 min_loc = j; 2149 /* search the minimum among nodes not yet touched on the connected component 2150 since there can be more than one constraint on a single cc */ 2151 for (j=1;j<size_of_constraint;j++) { 2152 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2153 min_index = row_cmat_global_indices[j]; 2154 min_loc = j; 2155 } 2156 } 2157 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2158 constraints_index[n++] = row_cmat_indices[min_loc]; 2159 } 2160 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2161 } 2162 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2163 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2164 } 2165 } 2166 *n_constraints = n; 2167 if (constraints_idx) *constraints_idx = constraints_index; 2168 PetscFunctionReturn(0); 2169 } 2170 2171 /* the next two functions has been adapted from pcis.c */ 2172 #undef __FUNCT__ 2173 #define __FUNCT__ "PCBDDCApplySchur" 2174 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2175 { 2176 PetscErrorCode ierr; 2177 PC_IS *pcis = (PC_IS*)(pc->data); 2178 2179 PetscFunctionBegin; 2180 if (!vec2_B) { vec2_B = v; } 2181 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2182 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2183 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2184 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2185 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2191 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2192 { 2193 PetscErrorCode ierr; 2194 PC_IS *pcis = (PC_IS*)(pc->data); 2195 2196 PetscFunctionBegin; 2197 if (!vec2_B) { vec2_B = v; } 2198 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2199 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2200 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2201 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2202 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "PCBDDCSubsetNumbering" 2208 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2209 { 2210 Vec local_vec,global_vec; 2211 IS seqis,paris; 2212 VecScatter scatter_ctx; 2213 PetscScalar *array; 2214 PetscInt *temp_global_dofs; 2215 PetscScalar globalsum; 2216 PetscInt i,j,s; 2217 PetscInt nlocals,first_index,old_index,max_local; 2218 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2219 PetscMPIInt *dof_sizes,*dof_displs; 2220 PetscBool first_found; 2221 PetscErrorCode ierr; 2222 2223 PetscFunctionBegin; 2224 /* mpi buffers */ 2225 MPI_Comm_size(comm,&size_prec_comm); 2226 MPI_Comm_rank(comm,&rank_prec_comm); 2227 j = ( !rank_prec_comm ? size_prec_comm : 0); 2228 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2229 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2230 /* get maximum size of subset */ 2231 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2232 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2233 max_local = 0; 2234 if (n_local_dofs) { 2235 max_local = temp_global_dofs[0]; 2236 for (i=1;i<n_local_dofs;i++) { 2237 if (max_local < temp_global_dofs[i] ) { 2238 max_local = temp_global_dofs[i]; 2239 } 2240 } 2241 } 2242 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2243 max_global++; 2244 max_local = 0; 2245 if (n_local_dofs) { 2246 max_local = local_dofs[0]; 2247 for (i=1;i<n_local_dofs;i++) { 2248 if (max_local < local_dofs[i] ) { 2249 max_local = local_dofs[i]; 2250 } 2251 } 2252 } 2253 max_local++; 2254 /* allocate workspace */ 2255 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2256 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2257 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2258 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2259 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2260 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2261 /* create scatter */ 2262 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2263 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2264 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2265 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2266 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2267 /* init array */ 2268 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2269 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2270 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2271 if (local_dofs_mult) { 2272 for (i=0;i<n_local_dofs;i++) { 2273 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2274 } 2275 } else { 2276 for (i=0;i<n_local_dofs;i++) { 2277 array[local_dofs[i]]=1.0; 2278 } 2279 } 2280 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2281 /* scatter into global vec and get total number of global dofs */ 2282 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2283 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2284 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2285 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2286 /* Fill global_vec with cumulative function for global numbering */ 2287 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2288 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2289 nlocals = 0; 2290 first_index = -1; 2291 first_found = PETSC_FALSE; 2292 for (i=0;i<s;i++) { 2293 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2294 first_found = PETSC_TRUE; 2295 first_index = i; 2296 } 2297 nlocals += (PetscInt)PetscRealPart(array[i]); 2298 } 2299 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2300 if (!rank_prec_comm) { 2301 dof_displs[0]=0; 2302 for (i=1;i<size_prec_comm;i++) { 2303 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2304 } 2305 } 2306 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2307 if (first_found) { 2308 array[first_index] += (PetscScalar)nlocals; 2309 old_index = first_index; 2310 for (i=first_index+1;i<s;i++) { 2311 if (PetscRealPart(array[i]) > 0.0) { 2312 array[i] += array[old_index]; 2313 old_index = i; 2314 } 2315 } 2316 } 2317 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2318 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2319 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2320 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2321 /* get global ordering of local dofs */ 2322 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2323 if (local_dofs_mult) { 2324 for (i=0;i<n_local_dofs;i++) { 2325 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2326 } 2327 } else { 2328 for (i=0;i<n_local_dofs;i++) { 2329 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2330 } 2331 } 2332 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2333 /* free workspace */ 2334 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2335 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2336 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2337 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2338 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2339 /* return pointer to global ordering of local dofs */ 2340 *global_numbering_subset = temp_global_dofs; 2341 PetscFunctionReturn(0); 2342 } 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2346 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2347 { 2348 PetscInt i,j; 2349 PetscScalar *alphas; 2350 PetscErrorCode ierr; 2351 2352 PetscFunctionBegin; 2353 /* this implements stabilized Gram-Schmidt */ 2354 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2355 for (i=0;i<n;i++) { 2356 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2357 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2358 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2359 } 2360 ierr = PetscFree(alphas);CHKERRQ(ierr); 2361 PetscFunctionReturn(0); 2362 } 2363 2364 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatConvert_IS_AIJ" 2367 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2368 { 2369 Mat new_mat; 2370 Mat_IS *matis = (Mat_IS*)(mat->data); 2371 /* info on mat */ 2372 PetscInt bs,rows,cols; 2373 PetscInt lrows,lcols; 2374 PetscInt local_rows,local_cols; 2375 PetscMPIInt nsubdomains; 2376 /* preallocation */ 2377 Vec vec_dnz,vec_onz; 2378 PetscScalar *my_dnz,*my_onz,*array; 2379 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2380 PetscInt index_row,index_col,owner; 2381 PetscInt *local_indices,*global_indices; 2382 /* work */ 2383 PetscInt i,j; 2384 PetscErrorCode ierr; 2385 2386 PetscFunctionBegin; 2387 /* CHECKS */ 2388 /* get info from mat */ 2389 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2390 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2391 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2392 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2393 2394 /* MAT_INITIAL_MATRIX */ 2395 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2396 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2397 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2398 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2399 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2400 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2401 2402 /* 2403 preallocation 2404 */ 2405 2406 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2407 /* 2408 Some vectors are needed to sum up properly on shared interface dofs. 2409 Note that preallocation is not exact, since it overestimates nonzeros 2410 */ 2411 /* 2412 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2413 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2414 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2415 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2416 */ 2417 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2418 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2419 /* All processes need to compute entire row ownership */ 2420 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2421 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2422 for (i=0;i<nsubdomains;i++) { 2423 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2424 row_ownership[j]=i; 2425 } 2426 } 2427 /* map indices of local mat to global values */ 2428 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2429 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2430 for (i=0;i<local_rows;i++) local_indices[i]=i; 2431 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2432 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2433 2434 /* 2435 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2436 then, they will be summed up properly. This way, preallocation is always sufficient 2437 */ 2438 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2439 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2440 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2441 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2442 for (i=0;i<local_rows;i++) { 2443 index_row = global_indices[i]; 2444 for (j=i;j<local_rows;j++) { 2445 owner = row_ownership[index_row]; 2446 index_col = global_indices[j]; 2447 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2448 my_dnz[i] += 1.0; 2449 } else { /* offdiag block */ 2450 my_onz[i] += 1.0; 2451 } 2452 /* same as before, interchanging rows and cols */ 2453 if (i != j) { 2454 owner = row_ownership[index_col]; 2455 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2456 my_dnz[j] += 1.0; 2457 } else { 2458 my_onz[j] += 1.0; 2459 } 2460 } 2461 } 2462 } 2463 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2464 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2465 if (local_rows) { /* multilevel guard */ 2466 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2467 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2468 } 2469 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2470 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2471 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2472 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2473 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2474 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2475 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2476 2477 /* set computed preallocation in dnz and onz */ 2478 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2479 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2480 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2481 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2482 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2483 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2484 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2485 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2486 2487 /* Resize preallocation if overestimated */ 2488 for (i=0;i<lrows;i++) { 2489 dnz[i] = PetscMin(dnz[i],lcols); 2490 onz[i] = PetscMin(onz[i],cols-lcols); 2491 } 2492 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2493 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2494 2495 /* 2496 Set values. Very Basic. 2497 */ 2498 for (i=0;i<local_rows;i++) { 2499 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2500 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2501 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2502 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2503 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2504 } 2505 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2506 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2507 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2508 2509 /* get back mat */ 2510 *M = new_mat; 2511 PetscFunctionReturn(0); 2512 } 2513 2514 #undef __FUNCT__ 2515 #define __FUNCT__ "MatISSubassemble_Private" 2516 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2517 { 2518 Mat subdomain_adj; 2519 IS new_ranks,ranks_send_to; 2520 MatPartitioning partitioner; 2521 Mat_IS *matis; 2522 PetscInt n_neighs,*neighs,*n_shared,**shared; 2523 PetscInt prank; 2524 PetscMPIInt size,rank,color; 2525 PetscInt *xadj,*adjncy,*oldranks; 2526 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2527 PetscInt i,j,n_subdomains,local_size,threshold=0; 2528 PetscErrorCode ierr; 2529 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2530 PetscSubcomm subcomm; 2531 2532 PetscFunctionBegin; 2533 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2534 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2535 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2536 2537 /* Get info on mapping */ 2538 matis = (Mat_IS*)(mat->data); 2539 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2540 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2541 2542 /* build local CSR graph of subdomains' connectivity */ 2543 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2544 xadj[0] = 0; 2545 xadj[1] = PetscMax(n_neighs-1,0); 2546 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2547 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2548 2549 if (threshold) { 2550 PetscInt* count,min_threshold; 2551 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2552 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2553 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2554 for (j=0;j<n_shared[i];j++) { 2555 count[shared[i][j]] += 1; 2556 } 2557 } 2558 /* adapt threshold since we dont want to lose significant connections */ 2559 min_threshold = n_neighs; 2560 for (i=1;i<n_neighs;i++) { 2561 for (j=0;j<n_shared[i];j++) { 2562 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2563 } 2564 } 2565 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2566 threshold = PetscMax(min_threshold+1,threshold); 2567 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2568 xadj[1] = 0; 2569 for (i=1;i<n_neighs;i++) { 2570 for (j=0;j<n_shared[i];j++) { 2571 if (count[shared[i][j]] < threshold) { 2572 adjncy[xadj[1]] = neighs[i]; 2573 adjncy_wgt[xadj[1]] = n_shared[i]; 2574 xadj[1]++; 2575 break; 2576 } 2577 } 2578 } 2579 ierr = PetscFree(count);CHKERRQ(ierr); 2580 } else { 2581 if (xadj[1]) { 2582 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2583 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2584 } 2585 } 2586 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2587 if (use_square) { 2588 for (i=0;i<xadj[1];i++) { 2589 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2590 } 2591 } 2592 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2593 2594 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2595 2596 /* 2597 Restrict work on active processes only. 2598 */ 2599 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2600 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2601 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2602 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2603 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2604 if (color) { 2605 ierr = PetscFree(xadj);CHKERRQ(ierr); 2606 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2607 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2608 } else { 2609 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2610 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2611 prank = rank; 2612 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2613 for (i=0;i<size;i++) { 2614 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2615 } 2616 for (i=0;i<xadj[1];i++) { 2617 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2618 } 2619 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2620 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2621 n_subdomains = (PetscInt)size; 2622 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2623 2624 /* Partition */ 2625 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2626 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2627 if (use_vwgt) { 2628 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2629 v_wgt[0] = local_size; 2630 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2631 } 2632 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2633 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2634 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2635 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2636 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2637 2638 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2639 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2640 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2641 /* clean up */ 2642 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2643 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2644 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2645 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2646 } 2647 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2648 2649 /* assemble parallel IS for sends */ 2650 i = 1; 2651 if (color) i=0; 2652 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2653 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2654 2655 /* get back IS */ 2656 *is_sends = ranks_send_to; 2657 PetscFunctionReturn(0); 2658 } 2659 2660 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatISSubassemble" 2664 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2665 { 2666 Mat local_mat,new_mat; 2667 Mat_IS *matis; 2668 IS is_sends_internal; 2669 PetscInt rows,cols; 2670 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2671 PetscBool ismatis,isdense; 2672 ISLocalToGlobalMapping l2gmap; 2673 PetscInt* l2gmap_indices; 2674 const PetscInt* is_indices; 2675 MatType new_local_type; 2676 MatTypePrivate new_local_type_private; 2677 /* buffers */ 2678 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2679 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2680 /* MPI */ 2681 MPI_Comm comm; 2682 PetscMPIInt n_sends,n_recvs,commsize; 2683 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2684 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2685 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2686 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2687 PetscErrorCode ierr; 2688 2689 PetscFunctionBegin; 2690 /* checks */ 2691 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2692 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2693 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2694 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2695 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2696 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2697 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2698 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2699 PetscValidLogicalCollectiveInt(mat,bs,0); 2700 /* prepare IS for sending if not provided */ 2701 if (!is_sends) { 2702 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2703 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2704 } else { 2705 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2706 is_sends_internal = is_sends; 2707 } 2708 2709 /* get pointer of MATIS data */ 2710 matis = (Mat_IS*)mat->data; 2711 2712 /* get comm */ 2713 comm = PetscObjectComm((PetscObject)mat); 2714 2715 /* compute number of sends */ 2716 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2717 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2718 2719 /* compute number of receives */ 2720 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2721 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2722 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2723 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2724 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2725 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2726 ierr = PetscFree(iflags);CHKERRQ(ierr); 2727 2728 /* prepare send/receive buffers */ 2729 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2730 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2731 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2732 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2733 2734 /* Get data from local mat */ 2735 if (!isdense) { 2736 /* TODO: See below some guidelines on how to prepare the local buffers */ 2737 /* 2738 send_buffer_vals should contain the raw values of the local matrix 2739 send_buffer_idxs should contain: 2740 - MatType_PRIVATE type 2741 - PetscInt size_of_l2gmap 2742 - PetscInt global_row_indices[size_of_l2gmap] 2743 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2744 */ 2745 } else { 2746 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2747 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2748 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2749 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2750 send_buffer_idxs[1] = i; 2751 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2752 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2753 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2754 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2755 for (i=0;i<n_sends;i++) { 2756 ilengths_vals[is_indices[i]] = len*len; 2757 ilengths_idxs[is_indices[i]] = len+2; 2758 } 2759 } 2760 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2761 buf_size_idxs = 0; 2762 buf_size_vals = 0; 2763 for (i=0;i<n_recvs;i++) { 2764 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2765 buf_size_vals += (PetscInt)olengths_vals[i]; 2766 } 2767 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2768 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2769 2770 /* get new tags for clean communications */ 2771 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2772 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2773 2774 /* allocate for requests */ 2775 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2776 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2777 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2778 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2779 2780 /* communications */ 2781 ptr_idxs = recv_buffer_idxs; 2782 ptr_vals = recv_buffer_vals; 2783 for (i=0;i<n_recvs;i++) { 2784 source_dest = onodes[i]; 2785 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2786 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2787 ptr_idxs += olengths_idxs[i]; 2788 ptr_vals += olengths_vals[i]; 2789 } 2790 for (i=0;i<n_sends;i++) { 2791 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2792 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2793 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2794 } 2795 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2796 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2797 2798 /* assemble new l2g map */ 2799 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2800 ptr_idxs = recv_buffer_idxs; 2801 buf_size_idxs = 0; 2802 for (i=0;i<n_recvs;i++) { 2803 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2804 ptr_idxs += olengths_idxs[i]; 2805 } 2806 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2807 ptr_idxs = recv_buffer_idxs; 2808 buf_size_idxs = 0; 2809 for (i=0;i<n_recvs;i++) { 2810 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2811 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2812 ptr_idxs += olengths_idxs[i]; 2813 } 2814 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2815 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2816 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2817 2818 /* infer new local matrix type from received local matrices type */ 2819 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2820 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2821 new_local_type_private = MATAIJ_PRIVATE; 2822 new_local_type = MATSEQAIJ; 2823 if (n_recvs) { 2824 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2825 ptr_idxs = recv_buffer_idxs; 2826 for (i=0;i<n_recvs;i++) { 2827 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2828 new_local_type_private = MATAIJ_PRIVATE; 2829 break; 2830 } 2831 ptr_idxs += olengths_idxs[i]; 2832 } 2833 switch (new_local_type_private) { 2834 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2835 new_local_type = MATSEQAIJ; 2836 bs = 1; 2837 break; 2838 case MATAIJ_PRIVATE: 2839 new_local_type = MATSEQAIJ; 2840 bs = 1; 2841 break; 2842 case MATBAIJ_PRIVATE: 2843 new_local_type = MATSEQBAIJ; 2844 break; 2845 case MATSBAIJ_PRIVATE: 2846 new_local_type = MATSEQSBAIJ; 2847 break; 2848 default: 2849 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2850 break; 2851 } 2852 } 2853 2854 /* create MATIS object */ 2855 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2856 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2857 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2858 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2859 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2860 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2861 2862 /* set values */ 2863 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2864 ptr_vals = recv_buffer_vals; 2865 ptr_idxs = recv_buffer_idxs; 2866 for (i=0;i<n_recvs;i++) { 2867 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2868 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2869 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2870 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2871 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2872 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2873 } 2874 ptr_idxs += olengths_idxs[i]; 2875 ptr_vals += olengths_vals[i]; 2876 } 2877 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2878 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2881 2882 { /* check */ 2883 Vec lvec,rvec; 2884 PetscReal infty_error; 2885 2886 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2887 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2888 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2889 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2890 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2891 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2892 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2893 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2894 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2895 } 2896 2897 /* free workspace */ 2898 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2899 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2900 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2901 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2902 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2903 if (isdense) { 2904 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2905 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2906 } else { 2907 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2908 } 2909 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2910 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2911 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2912 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2913 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2914 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2915 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2916 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2917 ierr = PetscFree(onodes);CHKERRQ(ierr); 2918 /* get back new mat */ 2919 *mat_n = new_mat; 2920 PetscFunctionReturn(0); 2921 } 2922 2923 #undef __FUNCT__ 2924 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2925 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2926 { 2927 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2928 PC_IS *pcis = (PC_IS*)pc->data; 2929 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2930 MatNullSpace CoarseNullSpace=NULL; 2931 ISLocalToGlobalMapping coarse_islg; 2932 IS coarse_is; 2933 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2934 PetscInt im_active=-1,active_procs=-1; 2935 PC pc_temp; 2936 PCType coarse_pc_type; 2937 KSPType coarse_ksp_type; 2938 PetscBool multilevel_requested,multilevel_allowed; 2939 PetscBool setsym,issym,isbddc,isnn; 2940 MatStructure matstruct; 2941 PetscErrorCode ierr; 2942 2943 PetscFunctionBegin; 2944 /* Assign global numbering to coarse dofs */ 2945 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2946 2947 /* infer some info from user */ 2948 issym = PETSC_FALSE; 2949 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2950 multilevel_allowed = PETSC_FALSE; 2951 multilevel_requested = PETSC_FALSE; 2952 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2953 if (multilevel_requested) { 2954 /* count "active processes" */ 2955 im_active = !!(pcis->n); 2956 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2957 if (active_procs/pcbddc->coarsening_ratio < 2) { 2958 multilevel_allowed = PETSC_FALSE; 2959 } else { 2960 multilevel_allowed = PETSC_TRUE; 2961 } 2962 } 2963 2964 /* set defaults for coarse KSP and PC */ 2965 if (multilevel_allowed) { 2966 if (issym) { 2967 coarse_ksp_type = KSPRICHARDSON; 2968 } else { 2969 coarse_ksp_type = KSPCHEBYSHEV; 2970 } 2971 coarse_pc_type = PCBDDC; 2972 } else { 2973 coarse_ksp_type = KSPPREONLY; 2974 coarse_pc_type = PCREDUNDANT; 2975 } 2976 2977 /* create the coarse KSP object only once with defaults */ 2978 if (!pcbddc->coarse_ksp) { 2979 char prefix[256],str_level[3]; 2980 size_t len; 2981 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2982 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2983 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2984 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2985 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2986 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2987 /* prefix */ 2988 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 2989 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2990 if (!pcbddc->current_level) { 2991 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2992 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 2993 } else { 2994 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2995 if (pcbddc->current_level>1) len -= 2; 2996 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 2997 *(prefix+len)='\0'; 2998 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 2999 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3000 } 3001 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3002 } 3003 /* allow user customization */ 3004 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3005 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3006 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3007 3008 /* get some info after set from options */ 3009 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3010 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3011 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3012 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3013 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3014 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3015 isbddc = PETSC_FALSE; 3016 } 3017 3018 /* propagate BDDC info to the next level */ 3019 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3020 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3021 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3022 3023 /* creates temporary MATIS object for coarse matrix */ 3024 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3025 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3026 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3027 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3028 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3029 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3030 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3031 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3032 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3033 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3034 3035 /* assemble coarse matrix */ 3036 if (isbddc || isnn) { 3037 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3038 } else { 3039 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3040 } 3041 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3042 3043 /* create local to global scatters for coarse problem */ 3044 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3045 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3046 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3047 3048 /* propagate symmetry info to coarse matrix */ 3049 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3050 3051 /* Compute coarse null space (special handling by BDDC only) */ 3052 if (pcbddc->NullSpace) { 3053 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3054 if (isbddc) { 3055 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3056 } else { 3057 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3058 } 3059 } 3060 3061 /* set operators */ 3062 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3063 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3064 3065 /* additional KSP customization */ 3066 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3067 if (max_it < 5) { 3068 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3069 } 3070 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3071 3072 3073 /* print some info if requested */ 3074 if (pcbddc->dbg_flag) { 3075 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3076 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3077 if (!multilevel_allowed) { 3078 if (multilevel_requested) { 3079 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3080 } else if (pcbddc->max_levels) { 3081 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3082 } 3083 } 3084 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3086 } 3087 3088 /* setup coarse ksp */ 3089 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3090 if (pcbddc->dbg_flag) { 3091 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3093 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3094 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3095 } 3096 3097 /* Check coarse problem if requested */ 3098 if (pcbddc->dbg_flag) { 3099 KSP check_ksp; 3100 KSPType check_ksp_type; 3101 PC check_pc; 3102 Vec check_vec; 3103 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3104 PetscInt its; 3105 PetscBool ispreonly,compute; 3106 3107 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3108 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3109 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3110 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3111 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3112 if (ispreonly) { 3113 check_ksp_type = KSPPREONLY; 3114 compute = PETSC_FALSE; 3115 } else { 3116 if (issym) check_ksp_type = KSPCG; 3117 else check_ksp_type = KSPGMRES; 3118 compute = PETSC_TRUE; 3119 } 3120 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3121 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3122 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3123 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3124 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3125 /* create random vec */ 3126 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3127 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3128 if (CoarseNullSpace) { 3129 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3130 } 3131 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3132 /* solve coarse problem */ 3133 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3134 if (CoarseNullSpace) { 3135 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3136 } 3137 /* check coarse problem residual error */ 3138 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3139 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3140 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3141 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3142 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3143 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3144 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3146 /* get eigenvalue estimation if preonly has not been requested */ 3147 if (!ispreonly) { 3148 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3149 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3150 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3151 } 3152 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3153 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3154 } 3155 /* free memory */ 3156 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3157 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3158 PetscFunctionReturn(0); 3159 } 3160 3161 #undef __FUNCT__ 3162 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3163 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3164 { 3165 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3166 PC_IS* pcis = (PC_IS*)pc->data; 3167 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3168 PetscInt i,j,coarse_size; 3169 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3170 PetscErrorCode ierr; 3171 3172 PetscFunctionBegin; 3173 /* get indices in local ordering for vertices and constraints */ 3174 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3176 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3177 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3178 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3179 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3180 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3181 3182 /* Compute global number of coarse dofs */ 3183 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3184 3185 /* check numbering */ 3186 if (pcbddc->dbg_flag) { 3187 PetscScalar coarsesum,*array; 3188 3189 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3190 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3191 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3192 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3193 for (i=0;i<pcbddc->local_primal_size;i++) { 3194 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3195 } 3196 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3197 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3198 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3199 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3200 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3201 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3202 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3203 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3204 for (i=0;i<pcis->n;i++) { 3205 if (array[i] == 1.0) { 3206 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3207 } 3208 } 3209 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3210 for (i=0;i<pcis->n;i++) { 3211 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3212 } 3213 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3214 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3215 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3216 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3217 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3218 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3219 if (pcbddc->dbg_flag > 1) { 3220 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3221 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3223 for (i=0;i<pcbddc->local_primal_size;i++) { 3224 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3225 } 3226 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3227 } 3228 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3229 } 3230 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3231 /* get back data */ 3232 *coarse_size_n = coarse_size; 3233 *local_primal_indices_n = local_primal_indices; 3234 PetscFunctionReturn(0); 3235 } 3236 3237