1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 111 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 112 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 118 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 119 { 120 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 121 PC_IS *pcis = (PC_IS*)pc->data; 122 VecType impVecType; 123 PetscInt n_constraints,n_R,old_size; 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 if (!pcbddc->ConstraintMatrix) { 128 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 129 } 130 /* get sizes */ 131 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 132 n_R = pcis->n-pcbddc->n_actual_vertices; 133 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 134 /* local work vectors (try to avoid unneeded work)*/ 135 /* R nodes */ 136 old_size = -1; 137 if (pcbddc->vec1_R) { 138 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 139 } 140 if (n_R != old_size) { 141 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 143 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 144 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 145 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 146 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 147 } 148 /* local primal dofs */ 149 old_size = -1; 150 if (pcbddc->vec1_P) { 151 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 152 } 153 if (pcbddc->local_primal_size != old_size) { 154 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 156 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 157 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 158 } 159 /* local explicit constraints */ 160 old_size = -1; 161 if (pcbddc->vec1_C) { 162 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 163 } 164 if (n_constraints && n_constraints != old_size) { 165 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 167 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 168 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "PCBDDCSetUpCorrection" 175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 176 { 177 PetscErrorCode ierr; 178 /* pointers to pcis and pcbddc */ 179 PC_IS* pcis = (PC_IS*)pc->data; 180 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 181 /* submatrices of local problem */ 182 Mat A_RV,A_VR,A_VV; 183 /* working matrices */ 184 Mat M1,M2,M3,C_CR; 185 /* working vectors */ 186 Vec vec1_C,vec2_C,vec1_V,vec2_V; 187 /* additional working stuff */ 188 IS is_aux; 189 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 190 const PetscScalar *array,*row_cmat_values; 191 const PetscInt *row_cmat_indices,*idx_R_local; 192 PetscInt *idx_V_B,*auxindices; 193 PetscInt n_vertices,n_constraints,size_of_constraint; 194 PetscInt i,j,n_R,n_D,n_B; 195 PetscBool unsymmetric_check; 196 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 197 MatType impMatType; 198 /* some shortcuts to scalars */ 199 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 200 /* for debugging purposes */ 201 PetscReal *coarsefunctions_errors,*constraints_errors; 202 203 PetscFunctionBegin; 204 /* get number of vertices (corners plus constraints with change of basis) 205 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 206 n_vertices = pcbddc->n_actual_vertices; 207 n_constraints = pcbddc->local_primal_size-n_vertices; 208 /* Set Non-overlapping dimensions */ 209 n_B = pcis->n_B; n_D = pcis->n - n_B; 210 n_R = pcis->n-n_vertices; 211 212 /* Set types for local objects needed by BDDC precondtioner */ 213 impMatType = MATSEQDENSE; 214 215 /* Allocating some extra storage just to be safe */ 216 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 217 for (i=0;i<pcis->n;i++) auxindices[i]=i; 218 219 /* vertices in boundary numbering */ 220 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 221 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 222 if (i != n_vertices) { 223 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 224 } 225 226 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 227 if (n_constraints) { 228 /* see if we can save some allocations */ 229 if (pcbddc->local_auxmat2) { 230 PetscInt on_R,on_constraints; 231 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 232 if (on_R != n_R || on_constraints != n_constraints) { 233 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 234 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 235 } 236 } 237 /* work vectors */ 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 239 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 240 /* auxiliary matrices */ 241 if (!pcbddc->local_auxmat2) { 242 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 243 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 244 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 245 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 246 } 247 248 /* Extract constraints on R nodes: C_{CR} */ 249 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 250 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 251 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 252 253 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 254 for (i=0;i<n_constraints;i++) { 255 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 256 /* Get row of constraint matrix in R numbering */ 257 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 259 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 260 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 261 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 262 /* Solve for row of constraint matrix in R numbering */ 263 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 264 /* Set values in local_auxmat2 */ 265 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 267 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 268 } 269 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 272 273 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 274 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 275 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 276 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 277 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 278 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 279 ierr = MatSetUp(M1);CHKERRQ(ierr); 280 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 281 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 282 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 283 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 284 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 285 ierr = MatDestroy(&M2);CHKERRQ(ierr); 286 ierr = MatDestroy(&M3);CHKERRQ(ierr); 287 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 288 if (!pcbddc->local_auxmat1) { 289 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } else { 291 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 292 } 293 } 294 295 /* Get submatrices from subdomain matrix */ 296 if (n_vertices) { 297 PetscInt ibs,mbs; 298 PetscBool issbaij; 299 Mat newmat; 300 301 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 302 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 303 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 304 if (ibs != mbs) { /* need to convert to SEQAIJ */ 305 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 309 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 310 } else { 311 /* this is safe */ 312 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 313 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 314 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 315 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 316 /* which of the two approaches is faster? */ 317 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 319 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 320 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 321 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 322 } else { 323 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 324 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 325 } 326 } 327 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 328 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 329 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 330 } 331 332 /* Matrix of coarse basis functions (local) */ 333 if (pcbddc->coarse_phi_B) { 334 PetscInt on_B,on_primal; 335 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 336 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 337 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 338 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 339 } 340 } 341 if (pcbddc->coarse_phi_D) { 342 PetscInt on_D,on_primal; 343 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 344 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 345 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 346 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 347 } 348 } 349 if (!pcbddc->coarse_phi_B) { 350 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 351 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 352 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 353 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 } 355 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 356 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 357 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 358 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 359 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 } 361 362 if (pcbddc->dbg_flag) { 363 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 364 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 365 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 366 } 367 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 368 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 369 370 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 371 372 /* vertices */ 373 for (i=0;i<n_vertices;i++) { 374 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 375 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 376 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 377 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 378 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 379 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 380 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 381 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 383 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 384 if (n_constraints) { 385 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 386 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 387 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 388 } 389 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 390 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 391 392 /* Set values in coarse basis function and subdomain part of coarse_mat */ 393 /* coarse basis functions */ 394 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 395 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 399 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 400 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 401 if (pcbddc->switch_static || pcbddc->dbg_flag) { 402 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 406 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 407 } 408 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 409 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 412 if (n_constraints) { 413 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 416 } 417 418 /* check */ 419 if (pcbddc->dbg_flag) { 420 /* assemble subdomain vector on local nodes */ 421 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 422 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 425 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 427 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 428 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 429 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 430 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 433 if (n_constraints) { 434 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 437 } 438 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 441 /* check saddle point solution */ 442 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 444 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 445 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 446 /* shift by the identity matrix */ 447 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 448 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 451 } 452 } 453 454 /* constraints */ 455 for (i=0;i<n_constraints;i++) { 456 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 457 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 458 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 459 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 460 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 461 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 462 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 463 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 464 if (n_vertices) { 465 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 466 } 467 /* Set values in coarse basis function and subdomain part of coarse_mat */ 468 /* coarse basis functions */ 469 j = i+n_vertices; /* don't touch this! */ 470 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 471 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 475 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 476 if (pcbddc->switch_static || pcbddc->dbg_flag) { 477 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 481 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 482 } 483 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 484 if (n_vertices) { 485 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 487 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 488 } 489 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 492 493 if (pcbddc->dbg_flag) { 494 /* assemble subdomain vector on nodes */ 495 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 496 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 498 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 499 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 500 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 501 /* assemble subdomain vector of lagrange multipliers */ 502 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 503 if (n_vertices) { 504 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 506 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 507 } 508 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 510 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 511 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 513 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 514 /* check saddle point solution */ 515 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 517 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 518 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 519 /* shift by the identity matrix */ 520 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 521 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 523 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 524 } 525 } 526 /* call assembling routines for local coarse basis */ 527 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 if (pcbddc->switch_static || pcbddc->dbg_flag) { 530 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 } 533 534 /* compute other basis functions for non-symmetric problems */ 535 ierr = MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);CHKERRQ(ierr); 536 if (!pcbddc->issym) { 537 if (!pcbddc->coarse_psi_B) { 538 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 539 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 541 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 542 } 543 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 544 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 545 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 546 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 547 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 548 } 549 for (i=0;i<pcbddc->local_primal_size;i++) { 550 if (n_constraints) { 551 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 552 for (j=0;j<n_constraints;j++) { 553 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 554 } 555 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 556 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 557 } 558 if (i<n_vertices) { 559 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 560 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 561 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 562 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 563 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 564 if (n_constraints) { 565 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 566 } 567 } else { 568 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 569 } 570 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 571 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 572 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 574 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 576 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 577 if (i<n_vertices) { 578 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 579 } 580 if (pcbddc->switch_static || pcbddc->dbg_flag) { 581 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 583 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 585 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 586 } 587 588 if (pcbddc->dbg_flag) { 589 /* assemble subdomain vector on nodes */ 590 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 591 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 593 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 594 if (i<n_vertices) { 595 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 596 } 597 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 598 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 599 /* assemble subdomain vector of lagrange multipliers */ 600 for (j=0;j<pcbddc->local_primal_size;j++) { 601 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 602 } 603 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 604 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 605 /* check saddle point solution */ 606 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 608 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 609 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 610 /* shift by the identity matrix */ 611 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 612 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 614 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 615 } 616 } 617 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 if (pcbddc->switch_static || pcbddc->dbg_flag) { 620 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 622 } 623 unsymmetric_check = PETSC_TRUE; 624 } else { /* take references to already computed coarse basis */ 625 unsymmetric_check = PETSC_FALSE; 626 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 627 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 628 if (pcbddc->coarse_phi_D) { 629 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 630 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 631 } 632 } 633 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 634 /* Checking coarse_sub_mat and coarse basis functios */ 635 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 636 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 637 if (pcbddc->dbg_flag) { 638 Mat coarse_sub_mat; 639 Mat AUXMAT,TM1,TM2,TM3,TM4; 640 Mat coarse_phi_D,coarse_phi_B; 641 Mat coarse_psi_D,coarse_psi_B; 642 Mat A_II,A_BB,A_IB,A_BI; 643 MatType checkmattype=MATSEQAIJ; 644 PetscReal real_value; 645 646 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 648 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 649 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 650 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 651 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 652 if (unsymmetric_check) { 653 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 654 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 655 } 656 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 657 658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 660 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 661 if (unsymmetric_check) { 662 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 663 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 664 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 665 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 666 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 667 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 668 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 669 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 670 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 671 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 672 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 674 } else { 675 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 676 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 677 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 } 684 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 686 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 687 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 688 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 689 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 695 for (i=0;i<pcbddc->local_primal_size;i++) { 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 697 } 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 699 for (i=0;i<pcbddc->local_primal_size;i++) { 700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 701 } 702 if (unsymmetric_check) { 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 704 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 706 } 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 708 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 710 } 711 } 712 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 715 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 716 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 719 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 720 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 721 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 722 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 723 if (unsymmetric_check) { 724 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 725 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 726 } 727 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 728 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 729 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 730 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 731 } 732 /* free memory */ 733 if (n_vertices) { 734 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 735 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 737 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 738 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 739 } 740 if (n_constraints) { 741 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 742 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 743 ierr = MatDestroy(&M1);CHKERRQ(ierr); 744 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 745 } 746 ierr = PetscFree(auxindices);CHKERRQ(ierr); 747 /* get back data */ 748 *coarse_submat_vals_n = coarse_submat_vals; 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 754 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 755 { 756 PC_IS* pcis = (PC_IS*)(pc->data); 757 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 758 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 759 PetscBool issbaij,isseqaij; 760 /* manage repeated solves */ 761 MatReuse reuse; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 766 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 767 } 768 /* get mat flags */ 769 reuse = MAT_INITIAL_MATRIX; 770 if (pc->setupcalled) { 771 if (pc->flag == SAME_NONZERO_PATTERN) { 772 reuse = MAT_REUSE_MATRIX; 773 } else { 774 reuse = MAT_INITIAL_MATRIX; 775 } 776 } 777 if (reuse == MAT_INITIAL_MATRIX) { 778 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 779 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 780 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 781 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 783 } 784 785 /* transform local matrices if needed */ 786 if (pcbddc->ChangeOfBasisMatrix) { 787 Mat change_mat_all; 788 PetscScalar *row_cmat_values; 789 PetscInt *row_cmat_indices; 790 PetscInt *nnz,*is_indices,*temp_indices; 791 PetscInt i,j,k,n_D,n_B; 792 793 /* Get Non-overlapping dimensions */ 794 n_B = pcis->n_B; 795 n_D = pcis->n-n_B; 796 797 /* compute nonzero structure of change of basis on all local nodes */ 798 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 799 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 800 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 801 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 802 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 803 k=1; 804 for (i=0;i<n_B;i++) { 805 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 806 nnz[is_indices[i]]=j; 807 if (k < j) k = j; 808 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 809 } 810 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 811 /* assemble change of basis matrix on the whole set of local dofs */ 812 ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr); 813 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 814 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 815 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 816 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 818 for (i=0;i<n_D;i++) { 819 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 822 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 823 for (i=0;i<n_B;i++) { 824 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 825 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 826 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 827 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 828 } 829 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 831 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 832 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 833 if (isseqaij) { 834 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 835 } else { 836 Mat work_mat; 837 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 838 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 839 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 840 } 841 /* 842 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 843 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 844 */ 845 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 846 ierr = PetscFree(nnz);CHKERRQ(ierr); 847 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 848 } else { 849 /* without change of basis, the local matrix is unchanged */ 850 if (!pcbddc->local_mat) { 851 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 852 pcbddc->local_mat = matis->A; 853 } 854 } 855 856 /* get submatrices */ 857 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 858 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 859 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 860 if (!issbaij) { 861 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 862 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 863 } else { 864 Mat newmat; 865 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 866 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 867 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 868 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 869 } 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 875 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 876 { 877 PC_IS* pcis = (PC_IS*)(pc->data); 878 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 879 IS is_aux1,is_aux2; 880 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 881 PetscInt n_vertices,i,j,n_R,n_D,n_B; 882 PetscInt vbs,bs; 883 PetscBT bitmask; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 /* 888 No need to setup local scatters if 889 - primal space is unchanged 890 AND 891 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 892 AND 893 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 894 */ 895 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 896 PetscFunctionReturn(0); 897 } 898 /* destroy old objects */ 899 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 900 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 901 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 902 /* Set Non-overlapping dimensions */ 903 n_B = pcis->n_B; n_D = pcis->n - n_B; 904 n_vertices = pcbddc->n_actual_vertices; 905 /* create auxiliary bitmask */ 906 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 907 for (i=0;i<n_vertices;i++) { 908 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 909 } 910 911 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 912 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 913 for (i=0, n_R=0; i<pcis->n; i++) { 914 if (!PetscBTLookup(bitmask,i)) { 915 idx_R_local[n_R] = i; 916 n_R++; 917 } 918 } 919 920 /* Block code */ 921 vbs = 1; 922 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 923 if (bs>1 && !(n_vertices%bs)) { 924 PetscBool is_blocked = PETSC_TRUE; 925 PetscInt *vary; 926 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 927 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 928 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 929 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 930 for (i=0; i<n_vertices; i++) { 931 if (vary[i]!=0 && vary[i]!=bs) { 932 is_blocked = PETSC_FALSE; 933 break; 934 } 935 } 936 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 937 vbs = bs; 938 for (i=0;i<n_R/vbs;i++) { 939 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 940 } 941 } 942 ierr = PetscFree(vary);CHKERRQ(ierr); 943 } 944 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 945 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 946 947 /* print some info if requested */ 948 if (pcbddc->dbg_flag) { 949 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 951 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 952 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 956 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 957 } 958 959 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 960 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 961 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 962 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 963 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 964 for (i=0; i<n_D; i++) { 965 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 966 } 967 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 968 for (i=0, j=0; i<n_R; i++) { 969 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 970 aux_array1[j++] = i; 971 } 972 } 973 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 974 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 975 for (i=0, j=0; i<n_B; i++) { 976 if (!PetscBTLookup(bitmask,is_indices[i])) { 977 aux_array2[j++] = i; 978 } 979 } 980 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 981 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 982 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 983 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 984 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 985 986 if (pcbddc->switch_static || pcbddc->dbg_flag) { 987 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 988 for (i=0, j=0; i<n_R; i++) { 989 if (PetscBTLookup(bitmask,idx_R_local[i])) { 990 aux_array1[j++] = i; 991 } 992 } 993 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 994 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 995 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 996 } 997 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 998 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1005 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1006 { 1007 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1008 PC_IS *pcis = (PC_IS*)pc->data; 1009 PC pc_temp; 1010 Mat A_RR; 1011 MatReuse reuse; 1012 PetscScalar m_one = -1.0; 1013 PetscReal value; 1014 PetscInt n_D,n_R,ibs,mbs; 1015 PetscBool use_exact,use_exact_reduced,issbaij; 1016 PetscErrorCode ierr; 1017 /* prefixes stuff */ 1018 char dir_prefix[256],neu_prefix[256],str_level[16]; 1019 size_t len; 1020 1021 PetscFunctionBegin; 1022 1023 /* compute prefixes */ 1024 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1025 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1026 if (!pcbddc->current_level) { 1027 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1028 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1029 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1030 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1031 } else { 1032 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1033 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1034 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1035 len -= 15; /* remove "pc_bddc_coarse_" */ 1036 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1037 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1038 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1039 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1040 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1041 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1042 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1043 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1044 } 1045 1046 /* DIRICHLET PROBLEM */ 1047 /* Matrix for Dirichlet problem is pcis->A_II */ 1048 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1049 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1050 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1051 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1052 /* default */ 1053 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1054 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1055 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1056 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1057 if (issbaij) { 1058 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1059 } else { 1060 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1061 } 1062 /* Allow user's customization */ 1063 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1064 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1065 } 1066 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1067 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1068 if (!n_D) { 1069 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1070 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1071 } 1072 /* Set Up KSP for Dirichlet problem of BDDC */ 1073 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1074 /* set ksp_D into pcis data */ 1075 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1076 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1077 pcis->ksp_D = pcbddc->ksp_D; 1078 1079 /* NEUMANN PROBLEM */ 1080 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1081 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1082 if (pcbddc->ksp_R) { /* already created ksp */ 1083 PetscInt nn_R; 1084 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1085 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1086 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1087 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1088 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1089 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1090 reuse = MAT_INITIAL_MATRIX; 1091 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1092 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1093 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1094 reuse = MAT_INITIAL_MATRIX; 1095 } else { /* safe to reuse the matrix */ 1096 reuse = MAT_REUSE_MATRIX; 1097 } 1098 } 1099 /* last check */ 1100 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1101 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1102 reuse = MAT_INITIAL_MATRIX; 1103 } 1104 } else { /* first time, so we need to create the matrix */ 1105 reuse = MAT_INITIAL_MATRIX; 1106 } 1107 /* extract A_RR */ 1108 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1109 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1110 if (ibs != mbs) { 1111 Mat newmat; 1112 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1113 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1114 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1115 } else { 1116 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1117 } 1118 if (!pcbddc->ksp_R) { /* create object if not present */ 1119 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1120 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1121 /* default */ 1122 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1123 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1124 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1125 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1126 if (issbaij) { 1127 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1128 } else { 1129 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1130 } 1131 /* Allow user's customization */ 1132 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1133 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1134 } 1135 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1136 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1137 if (!n_R) { 1138 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1139 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1140 } 1141 /* Set Up KSP for Neumann problem of BDDC */ 1142 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1143 1144 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1145 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1146 /* Dirichlet */ 1147 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1148 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1149 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1150 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1151 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1152 /* need to be adapted? */ 1153 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1154 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1155 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1156 /* print info */ 1157 if (pcbddc->dbg_flag) { 1158 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1159 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1162 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1163 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1164 } 1165 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1166 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1167 } 1168 1169 /* Neumann */ 1170 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1171 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1172 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1173 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1174 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1175 /* need to be adapted? */ 1176 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1177 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1178 /* print info */ 1179 if (pcbddc->dbg_flag) { 1180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1182 } 1183 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1184 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1185 } 1186 } 1187 /* free Neumann problem's matrix */ 1188 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1194 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1195 { 1196 PetscErrorCode ierr; 1197 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1198 1199 PetscFunctionBegin; 1200 if (applytranspose) { 1201 if (pcbddc->local_auxmat1) { 1202 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1203 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1204 } 1205 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1206 } else { 1207 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1208 if (pcbddc->local_auxmat1) { 1209 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1210 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1211 } 1212 } 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1219 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1220 { 1221 PetscErrorCode ierr; 1222 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1223 PC_IS* pcis = (PC_IS*) (pc->data); 1224 const PetscScalar zero = 0.0; 1225 1226 PetscFunctionBegin; 1227 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1228 if (applytranspose) { 1229 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1230 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1231 } else { 1232 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1233 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1234 } 1235 /* start communications from local primal nodes to rhs of coarse solver */ 1236 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1237 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1241 /* TODO remove null space when doing multilevel */ 1242 if (pcbddc->coarse_ksp) { 1243 if (applytranspose) { 1244 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1245 } else { 1246 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1247 } 1248 } 1249 /* start communications from coarse solver solution to local primal nodes */ 1250 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1251 1252 /* Local solution on R nodes */ 1253 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1254 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1255 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 if (pcbddc->switch_static) { 1257 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1259 } 1260 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1261 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1262 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 if (pcbddc->switch_static) { 1265 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 } 1268 1269 /* complete communications from coarse sol to local primal nodes */ 1270 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1271 1272 /* Sum contributions from two levels */ 1273 if (applytranspose) { 1274 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1275 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1276 } else { 1277 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1278 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 1283 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1286 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1287 { 1288 PetscErrorCode ierr; 1289 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1290 PetscScalar *array,*array2; 1291 Vec from,to; 1292 1293 PetscFunctionBegin; 1294 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1295 from = pcbddc->coarse_vec; 1296 to = pcbddc->vec1_P; 1297 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1298 Vec tvec; 1299 PetscInt lsize; 1300 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1301 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1302 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1303 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1304 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1305 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1306 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1307 } 1308 } else { /* from local to global -> put data in coarse right hand side */ 1309 from = pcbddc->vec1_P; 1310 to = pcbddc->coarse_vec; 1311 } 1312 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1318 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1319 { 1320 PetscErrorCode ierr; 1321 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1322 PetscScalar *array,*array2; 1323 Vec from,to; 1324 1325 PetscFunctionBegin; 1326 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1327 from = pcbddc->coarse_vec; 1328 to = pcbddc->vec1_P; 1329 } else { /* from local to global -> put data in coarse right hand side */ 1330 from = pcbddc->vec1_P; 1331 to = pcbddc->coarse_vec; 1332 } 1333 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1334 if (smode == SCATTER_FORWARD) { 1335 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1336 Vec tvec; 1337 PetscInt lsize; 1338 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1339 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1340 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1341 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1342 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1343 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1344 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1345 } 1346 } 1347 PetscFunctionReturn(0); 1348 } 1349 1350 /* uncomment for testing purposes */ 1351 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1354 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1355 { 1356 PetscErrorCode ierr; 1357 PC_IS* pcis = (PC_IS*)(pc->data); 1358 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1359 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1360 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1361 MatType impMatType=MATSEQAIJ; 1362 /* one and zero */ 1363 PetscScalar one=1.0,zero=0.0; 1364 /* space to store constraints and their local indices */ 1365 PetscScalar *temp_quadrature_constraint; 1366 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1367 /* iterators */ 1368 PetscInt i,j,k,total_counts,temp_start_ptr; 1369 /* stuff to store connected components stored in pcbddc->mat_graph */ 1370 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1371 PetscInt n_ISForFaces,n_ISForEdges; 1372 /* near null space stuff */ 1373 MatNullSpace nearnullsp; 1374 const Vec *nearnullvecs; 1375 Vec *localnearnullsp; 1376 PetscBool nnsp_has_cnst; 1377 PetscInt nnsp_size; 1378 PetscScalar *array; 1379 /* BLAS integers */ 1380 PetscBLASInt lwork,lierr; 1381 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1382 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1383 /* LAPACK working arrays for SVD or POD */ 1384 PetscBool skip_lapack; 1385 PetscScalar *work; 1386 PetscReal *singular_vals; 1387 #if defined(PETSC_USE_COMPLEX) 1388 PetscReal *rwork; 1389 #endif 1390 #if defined(PETSC_MISSING_LAPACK_GESVD) 1391 PetscBLASInt Blas_one_2=1; 1392 PetscScalar *temp_basis,*correlation_mat; 1393 #else 1394 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1395 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1396 #endif 1397 /* reuse */ 1398 PetscInt olocal_primal_size; 1399 PetscInt *oprimal_indices_local_idxs; 1400 /* change of basis */ 1401 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1402 PetscBool boolforchange,qr_needed; 1403 PetscBT touched,change_basis,qr_needed_idx; 1404 /* auxiliary stuff */ 1405 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1406 /* some quantities */ 1407 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1408 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1409 1410 1411 PetscFunctionBegin; 1412 /* Destroy Mat objects computed previously */ 1413 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1414 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1415 /* Get index sets for faces, edges and vertices from graph */ 1416 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1417 pcbddc->use_vertices = PETSC_TRUE; 1418 } 1419 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1420 /* HACKS (the two following code branches) */ 1421 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1422 pcbddc->use_change_of_basis = PETSC_TRUE; 1423 pcbddc->use_change_on_faces = PETSC_FALSE; 1424 } 1425 if (pcbddc->NullSpace) { 1426 /* use_change_of_basis should be consistent among processors */ 1427 PetscBool tbool = pcbddc->use_change_of_basis; 1428 ierr = MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1429 } 1430 /* print some info */ 1431 if (pcbddc->dbg_flag) { 1432 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1434 i = 0; 1435 if (ISForVertices) { 1436 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1437 } 1438 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1439 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1440 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1441 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1442 } 1443 /* check if near null space is attached to global mat */ 1444 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1445 if (nearnullsp) { 1446 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1447 /* remove any stored info */ 1448 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1449 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1450 /* store information for BDDC solver reuse */ 1451 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1452 pcbddc->onearnullspace = nearnullsp; 1453 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1454 for (i=0;i<nnsp_size;i++) { 1455 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1456 } 1457 } else { /* if near null space is not provided BDDC uses constants by default */ 1458 nnsp_size = 0; 1459 nnsp_has_cnst = PETSC_TRUE; 1460 } 1461 /* get max number of constraints on a single cc */ 1462 max_constraints = nnsp_size; 1463 if (nnsp_has_cnst) max_constraints++; 1464 1465 /* 1466 Evaluate maximum storage size needed by the procedure 1467 - temp_indices will contain start index of each constraint stored as follows 1468 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1469 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1470 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1471 */ 1472 total_counts = n_ISForFaces+n_ISForEdges; 1473 total_counts *= max_constraints; 1474 n_vertices = 0; 1475 if (ISForVertices) { 1476 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1477 } 1478 total_counts += n_vertices; 1479 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1480 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1481 total_counts = 0; 1482 max_size_of_constraint = 0; 1483 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1484 if (i<n_ISForEdges) { 1485 used_IS = &ISForEdges[i]; 1486 } else { 1487 used_IS = &ISForFaces[i-n_ISForEdges]; 1488 } 1489 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1490 total_counts += j; 1491 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1492 } 1493 total_counts *= max_constraints; 1494 total_counts += n_vertices; 1495 ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr); 1496 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr); 1497 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1498 /* get local part of global near null space vectors */ 1499 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1500 for (k=0;k<nnsp_size;k++) { 1501 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1502 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1504 } 1505 1506 /* whether or not to skip lapack calls */ 1507 skip_lapack = PETSC_TRUE; 1508 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1509 1510 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1511 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1512 PetscScalar temp_work; 1513 #if defined(PETSC_MISSING_LAPACK_GESVD) 1514 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1515 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1516 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1517 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1518 #if defined(PETSC_USE_COMPLEX) 1519 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1520 #endif 1521 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1522 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1523 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1524 lwork = -1; 1525 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1526 #if !defined(PETSC_USE_COMPLEX) 1527 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1528 #else 1529 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1530 #endif 1531 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1532 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1533 #else /* on missing GESVD */ 1534 /* SVD */ 1535 PetscInt max_n,min_n; 1536 max_n = max_size_of_constraint; 1537 min_n = max_constraints; 1538 if (max_size_of_constraint < max_constraints) { 1539 min_n = max_size_of_constraint; 1540 max_n = max_constraints; 1541 } 1542 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1543 #if defined(PETSC_USE_COMPLEX) 1544 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1545 #endif 1546 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1547 lwork = -1; 1548 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1549 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1550 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1552 #if !defined(PETSC_USE_COMPLEX) 1553 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1554 #else 1555 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1556 #endif 1557 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1558 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1559 #endif /* on missing GESVD */ 1560 /* Allocate optimal workspace */ 1561 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1562 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1563 } 1564 /* Now we can loop on constraining sets */ 1565 total_counts = 0; 1566 temp_indices[0] = 0; 1567 /* vertices */ 1568 if (ISForVertices) { 1569 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1570 if (nnsp_has_cnst) { /* consider all vertices */ 1571 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1572 for (i=0;i<n_vertices;i++) { 1573 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1574 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1575 total_counts++; 1576 } 1577 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1578 PetscBool used_vertex; 1579 for (i=0;i<n_vertices;i++) { 1580 used_vertex = PETSC_FALSE; 1581 k = 0; 1582 while (!used_vertex && k<nnsp_size) { 1583 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1584 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1585 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1586 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1587 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1588 total_counts++; 1589 used_vertex = PETSC_TRUE; 1590 } 1591 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1592 k++; 1593 } 1594 } 1595 } 1596 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1597 n_vertices = total_counts; 1598 } 1599 1600 /* edges and faces */ 1601 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1602 if (i<n_ISForEdges) { 1603 used_IS = &ISForEdges[i]; 1604 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1605 } else { 1606 used_IS = &ISForFaces[i-n_ISForEdges]; 1607 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1608 } 1609 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1610 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1611 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1612 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1613 /* change of basis should not be performed on local periodic nodes */ 1614 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1615 if (nnsp_has_cnst) { 1616 PetscScalar quad_value; 1617 temp_constraints++; 1618 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1619 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1620 for (j=0;j<size_of_constraint;j++) { 1621 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1622 } 1623 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1624 total_counts++; 1625 } 1626 for (k=0;k<nnsp_size;k++) { 1627 PetscReal real_value; 1628 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1629 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1630 for (j=0;j<size_of_constraint;j++) { 1631 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1632 } 1633 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1634 /* check if array is null on the connected component */ 1635 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1636 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1637 if (real_value > 0.0) { /* keep indices and values */ 1638 temp_constraints++; 1639 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1640 total_counts++; 1641 } 1642 } 1643 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1644 valid_constraints = temp_constraints; 1645 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1646 if (!pcbddc->use_nnsp_true && temp_constraints) { 1647 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1648 1649 #if defined(PETSC_MISSING_LAPACK_GESVD) 1650 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1651 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1652 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1653 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1654 from that computed using LAPACKgesvd 1655 -> This is due to a different computation of eigenvectors in LAPACKheev 1656 -> The quality of the POD-computed basis will be the same */ 1657 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1658 /* Store upper triangular part of correlation matrix */ 1659 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1660 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1661 for (j=0;j<temp_constraints;j++) { 1662 for (k=0;k<j+1;k++) { 1663 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1664 } 1665 } 1666 /* compute eigenvalues and eigenvectors of correlation matrix */ 1667 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1668 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1669 #if !defined(PETSC_USE_COMPLEX) 1670 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1671 #else 1672 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1673 #endif 1674 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1675 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1676 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1677 j=0; 1678 while (j < temp_constraints && singular_vals[j] < tol) j++; 1679 total_counts=total_counts-j; 1680 valid_constraints = temp_constraints-j; 1681 /* scale and copy POD basis into used quadrature memory */ 1682 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1683 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1684 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1685 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1686 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1687 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1688 if (j<temp_constraints) { 1689 PetscInt ii; 1690 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1691 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1692 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1693 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1694 for (k=0;k<temp_constraints-j;k++) { 1695 for (ii=0;ii<size_of_constraint;ii++) { 1696 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1697 } 1698 } 1699 } 1700 #else /* on missing GESVD */ 1701 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1702 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1704 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1705 #if !defined(PETSC_USE_COMPLEX) 1706 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1707 #else 1708 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1709 #endif 1710 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1711 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1712 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1713 k = temp_constraints; 1714 if (k > size_of_constraint) k = size_of_constraint; 1715 j = 0; 1716 while (j < k && singular_vals[k-j-1] < tol) j++; 1717 total_counts = total_counts-temp_constraints+k-j; 1718 valid_constraints = k-j; 1719 #endif /* on missing GESVD */ 1720 } 1721 /* setting change_of_basis flag is safe now */ 1722 if (boolforchange) { 1723 for (j=0;j<valid_constraints;j++) { 1724 PetscBTSet(change_basis,total_counts-j-1); 1725 } 1726 } 1727 } 1728 /* free index sets of faces, edges and vertices */ 1729 for (i=0;i<n_ISForFaces;i++) { 1730 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1731 } 1732 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1733 for (i=0;i<n_ISForEdges;i++) { 1734 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1735 } 1736 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1737 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1738 /* map temp_indices_to_constraint in boundary numbering */ 1739 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1740 if (i != temp_indices[total_counts]) { 1741 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1742 } 1743 1744 /* free workspace */ 1745 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1746 ierr = PetscFree(work);CHKERRQ(ierr); 1747 #if defined(PETSC_USE_COMPLEX) 1748 ierr = PetscFree(rwork);CHKERRQ(ierr); 1749 #endif 1750 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1751 #if defined(PETSC_MISSING_LAPACK_GESVD) 1752 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1753 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1754 #endif 1755 } 1756 for (k=0;k<nnsp_size;k++) { 1757 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1758 } 1759 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1760 1761 /* set quantities in pcbddc data structure and store previous primal size */ 1762 /* n_vertices defines the number of subdomain corners in the primal space */ 1763 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1764 olocal_primal_size = pcbddc->local_primal_size; 1765 pcbddc->local_primal_size = total_counts; 1766 pcbddc->n_vertices = n_vertices; 1767 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1768 1769 /* Create constraint matrix */ 1770 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1771 /* so we need to set it up properly either with or without change of basis */ 1772 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1773 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1774 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1775 /* array to compute a local numbering of constraints : vertices first then constraints */ 1776 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1777 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1778 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1779 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1780 /* auxiliary stuff for basis change */ 1781 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1782 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1783 1784 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1785 total_primal_vertices=0; 1786 for (i=0;i<pcbddc->local_primal_size;i++) { 1787 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1788 if (size_of_constraint == 1) { 1789 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1790 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1791 aux_primal_minloc[total_primal_vertices]=0; 1792 total_primal_vertices++; 1793 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1794 PetscInt min_loc,min_index; 1795 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1796 /* find first untouched local node */ 1797 k = 0; 1798 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1799 min_index = global_indices[k]; 1800 min_loc = k; 1801 /* search the minimum among global nodes already untouched on the cc */ 1802 for (k=1;k<size_of_constraint;k++) { 1803 /* there can be more than one constraint on a single connected component */ 1804 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1805 min_index = global_indices[k]; 1806 min_loc = k; 1807 } 1808 } 1809 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1810 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1811 aux_primal_minloc[total_primal_vertices]=min_loc; 1812 total_primal_vertices++; 1813 } 1814 } 1815 /* determine if a QR strategy is needed for change of basis */ 1816 qr_needed = PETSC_FALSE; 1817 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1818 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1819 if (PetscBTLookup(change_basis,i)) { 1820 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1821 j = 0; 1822 for (k=0;k<size_of_constraint;k++) { 1823 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1824 j++; 1825 } 1826 } 1827 /* found more than one primal dof on the cc */ 1828 if (j > 1) { 1829 PetscBTSet(qr_needed_idx,i); 1830 qr_needed = PETSC_TRUE; 1831 } 1832 } 1833 } 1834 /* free workspace */ 1835 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1836 1837 /* permute indices in order to have a sorted set of vertices */ 1838 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1839 1840 /* nonzero structure of constraint matrix */ 1841 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1842 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1843 j=total_primal_vertices; 1844 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1845 if (!PetscBTLookup(change_basis,i)) { 1846 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1847 j++; 1848 } 1849 } 1850 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1851 ierr = PetscFree(nnz);CHKERRQ(ierr); 1852 /* set values in constraint matrix */ 1853 for (i=0;i<total_primal_vertices;i++) { 1854 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1855 } 1856 total_counts = total_primal_vertices; 1857 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1858 if (!PetscBTLookup(change_basis,i)) { 1859 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1860 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1861 total_counts++; 1862 } 1863 } 1864 /* assembling */ 1865 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1866 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1867 /* 1868 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1869 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1870 */ 1871 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1872 if (pcbddc->use_change_of_basis) { 1873 /* dual and primal dofs on a single cc */ 1874 PetscInt dual_dofs,primal_dofs; 1875 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1876 PetscInt primal_counter; 1877 /* working stuff for GEQRF */ 1878 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1879 PetscBLASInt lqr_work; 1880 /* working stuff for UNGQR */ 1881 PetscScalar *gqr_work,lgqr_work_t; 1882 PetscBLASInt lgqr_work; 1883 /* working stuff for TRTRS */ 1884 PetscScalar *trs_rhs; 1885 PetscBLASInt Blas_NRHS; 1886 /* pointers for values insertion into change of basis matrix */ 1887 PetscInt *start_rows,*start_cols; 1888 PetscScalar *start_vals; 1889 /* working stuff for values insertion */ 1890 PetscBT is_primal; 1891 1892 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1893 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1894 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1895 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1896 /* work arrays */ 1897 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1898 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1899 /* nonzeros per row */ 1900 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1901 if (PetscBTLookup(change_basis,i)) { 1902 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1903 if (PetscBTLookup(qr_needed_idx,i)) { 1904 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1905 } else { 1906 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1907 /* get local primal index on the cc */ 1908 j = 0; 1909 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1910 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1911 } 1912 } 1913 } 1914 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1915 ierr = PetscFree(nnz);CHKERRQ(ierr); 1916 /* Set initial identity in the matrix */ 1917 for (i=0;i<pcis->n_B;i++) { 1918 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1919 } 1920 1921 if (pcbddc->dbg_flag) { 1922 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1923 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1924 } 1925 1926 1927 /* Now we loop on the constraints which need a change of basis */ 1928 /* 1929 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1930 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1931 1932 Basic blocks of change of basis matrix T computed 1933 1934 - Using the following block transformation if there is only a primal dof on the cc 1935 (in the example, primal dof is the last one of the edge in LOCAL ordering 1936 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1937 | 1 0 ... 0 1 | 1938 | 0 1 ... 0 1 | 1939 | ... | 1940 | 0 ... 1 1 | 1941 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1942 1943 - via QR decomposition of constraints otherwise 1944 */ 1945 if (qr_needed) { 1946 /* space to store Q */ 1947 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 1948 /* first we issue queries for optimal work */ 1949 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1950 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1951 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1952 lqr_work = -1; 1953 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1954 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1955 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1956 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 1957 lgqr_work = -1; 1958 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1959 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1960 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1961 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1962 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1963 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1964 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1965 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1966 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 1967 /* array to store scaling factors for reflectors */ 1968 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 1969 /* array to store rhs and solution of triangular solver */ 1970 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 1971 /* allocating workspace for check */ 1972 if (pcbddc->dbg_flag) { 1973 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 1974 } 1975 } 1976 /* array to store whether a node is primal or not */ 1977 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1978 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 1979 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1980 if (i != total_primal_vertices) { 1981 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1982 } 1983 for (i=0;i<total_primal_vertices;i++) { 1984 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1985 } 1986 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1987 1988 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1989 /* -> using implicit ordering contained in temp_indices data */ 1990 total_counts = pcbddc->n_vertices; 1991 primal_counter = total_counts; 1992 while (total_counts<pcbddc->local_primal_size) { 1993 primal_dofs = 1; 1994 if (PetscBTLookup(change_basis,total_counts)) { 1995 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1996 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1997 primal_dofs++; 1998 } 1999 /* get constraint info */ 2000 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2001 dual_dofs = size_of_constraint-primal_dofs; 2002 2003 if (pcbddc->dbg_flag) { 2004 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2005 } 2006 2007 if (primal_dofs > 1) { /* QR */ 2008 2009 /* copy quadrature constraints for change of basis check */ 2010 if (pcbddc->dbg_flag) { 2011 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2012 } 2013 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2014 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2015 2016 /* compute QR decomposition of constraints */ 2017 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2018 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2020 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2021 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2022 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2023 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2024 2025 /* explictly compute R^-T */ 2026 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2027 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2028 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2029 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2030 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2031 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2032 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2033 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2034 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2035 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2036 2037 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2038 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2039 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2040 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2041 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2042 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2043 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2044 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2045 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2046 2047 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2048 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2049 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2050 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2051 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2052 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2053 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2054 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2056 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2057 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2058 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2059 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2060 2061 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2062 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2063 /* insert cols for primal dofs */ 2064 for (j=0;j<primal_dofs;j++) { 2065 start_vals = &qr_basis[j*size_of_constraint]; 2066 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2067 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2068 } 2069 /* insert cols for dual dofs */ 2070 for (j=0,k=0;j<dual_dofs;k++) { 2071 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2072 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2073 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2074 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2075 j++; 2076 } 2077 } 2078 2079 /* check change of basis */ 2080 if (pcbddc->dbg_flag) { 2081 PetscInt ii,jj; 2082 PetscBool valid_qr=PETSC_TRUE; 2083 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2084 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2085 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2089 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2090 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2091 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2092 for (jj=0;jj<size_of_constraint;jj++) { 2093 for (ii=0;ii<primal_dofs;ii++) { 2094 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2095 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2096 } 2097 } 2098 if (!valid_qr) { 2099 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2100 for (jj=0;jj<size_of_constraint;jj++) { 2101 for (ii=0;ii<primal_dofs;ii++) { 2102 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2103 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2104 } 2105 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2106 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2107 } 2108 } 2109 } 2110 } else { 2111 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2112 } 2113 } 2114 } else { /* simple transformation block */ 2115 PetscInt row,col; 2116 PetscScalar val; 2117 for (j=0;j<size_of_constraint;j++) { 2118 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2119 if (!PetscBTLookup(is_primal,row)) { 2120 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2121 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2122 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2123 } else { 2124 for (k=0;k<size_of_constraint;k++) { 2125 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2126 if (row != col) { 2127 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2128 } else { 2129 val = 1.0; 2130 } 2131 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2132 } 2133 } 2134 } 2135 if (pcbddc->dbg_flag) { 2136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2137 } 2138 } 2139 /* increment primal counter */ 2140 primal_counter += primal_dofs; 2141 } else { 2142 if (pcbddc->dbg_flag) { 2143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2144 } 2145 } 2146 /* increment constraint counter total_counts */ 2147 total_counts += primal_dofs; 2148 } 2149 2150 /* free workspace */ 2151 if (qr_needed) { 2152 if (pcbddc->dbg_flag) { 2153 ierr = PetscFree(work);CHKERRQ(ierr); 2154 } 2155 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2156 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2157 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2158 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2159 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2160 } 2161 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2162 /* assembling */ 2163 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2164 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2165 /* 2166 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2167 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2168 */ 2169 } 2170 /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */ 2171 if (pcbddc->user_ChangeOfBasisMatrix) { 2172 PetscInt rows,cols; 2173 ierr = MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);CHKERRQ(ierr); 2174 if (rows == pcis->n && cols == pcis->n) { 2175 ierr = MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2176 } else { 2177 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2178 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2179 } 2180 } 2181 2182 /* get indices in local ordering for vertices and constraints */ 2183 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2184 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2185 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2186 } 2187 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2188 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2189 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2190 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2191 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2192 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2193 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2194 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2195 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2196 /* set quantities in PCBDDC data struct */ 2197 pcbddc->n_actual_vertices = i; 2198 /* check if a new primal space has been introduced */ 2199 pcbddc->new_primal_space_local = PETSC_TRUE; 2200 if (olocal_primal_size == pcbddc->local_primal_size) { 2201 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2202 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2203 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2204 } 2205 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2206 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2207 2208 /* flush dbg viewer */ 2209 if (pcbddc->dbg_flag) { 2210 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2211 } 2212 2213 /* free workspace */ 2214 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2215 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2216 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2217 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2218 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2219 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2220 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2221 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 #undef __FUNCT__ 2226 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2227 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2228 { 2229 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2230 PC_IS *pcis = (PC_IS*)pc->data; 2231 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2232 PetscInt ierr,i,vertex_size; 2233 PetscViewer viewer=pcbddc->dbg_viewer; 2234 2235 PetscFunctionBegin; 2236 /* Reset previously computed graph */ 2237 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2238 /* Init local Graph struct */ 2239 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2240 2241 /* Check validity of the csr graph passed in by the user */ 2242 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2243 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2244 } 2245 2246 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2247 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2248 Mat mat_adj; 2249 const PetscInt *xadj,*adjncy; 2250 PetscBool flg_row=PETSC_TRUE; 2251 2252 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2253 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2254 if (!flg_row) { 2255 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2256 } 2257 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2258 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2259 if (!flg_row) { 2260 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2261 } 2262 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2263 } 2264 2265 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2266 vertex_size = 1; 2267 if (pcbddc->user_provided_isfordofs) { 2268 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2269 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2270 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2271 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2272 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2273 } 2274 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2275 pcbddc->n_ISForDofs = 0; 2276 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2277 } 2278 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2279 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2280 } else { 2281 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2282 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2283 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2284 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2285 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2286 } 2287 } 2288 } 2289 2290 /* Setup of Graph */ 2291 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2292 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2293 } 2294 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2295 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2296 } 2297 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2298 2299 /* Graph's connected components analysis */ 2300 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2301 2302 /* print some info to stdout */ 2303 if (pcbddc->dbg_flag) { 2304 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2305 } 2306 2307 /* mark topography has done */ 2308 pcbddc->recompute_topography = PETSC_FALSE; 2309 PetscFunctionReturn(0); 2310 } 2311 2312 #undef __FUNCT__ 2313 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2314 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2315 { 2316 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2317 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2318 PetscErrorCode ierr; 2319 2320 PetscFunctionBegin; 2321 n = 0; 2322 vertices = 0; 2323 if (pcbddc->ConstraintMatrix) { 2324 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2325 for (i=0;i<local_primal_size;i++) { 2326 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2327 if (size_of_constraint == 1) n++; 2328 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2329 } 2330 if (vertices_idx) { 2331 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2332 n = 0; 2333 for (i=0;i<local_primal_size;i++) { 2334 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2335 if (size_of_constraint == 1) { 2336 vertices[n++]=row_cmat_indices[0]; 2337 } 2338 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2339 } 2340 } 2341 } 2342 *n_vertices = n; 2343 if (vertices_idx) *vertices_idx = vertices; 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2349 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2350 { 2351 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2352 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2353 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2354 PetscBT touched; 2355 PetscErrorCode ierr; 2356 2357 /* This function assumes that the number of local constraints per connected component 2358 is not greater than the number of nodes defined for the connected component 2359 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2360 PetscFunctionBegin; 2361 n = 0; 2362 constraints_index = 0; 2363 if (pcbddc->ConstraintMatrix) { 2364 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2365 max_size_of_constraint = 0; 2366 for (i=0;i<local_primal_size;i++) { 2367 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2368 if (size_of_constraint > 1) { 2369 n++; 2370 } 2371 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2372 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2373 } 2374 if (constraints_idx) { 2375 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2376 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2377 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2378 n = 0; 2379 for (i=0;i<local_primal_size;i++) { 2380 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2381 if (size_of_constraint > 1) { 2382 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2383 /* find first untouched local node */ 2384 j = 0; 2385 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2386 min_index = row_cmat_global_indices[j]; 2387 min_loc = j; 2388 /* search the minimum among nodes not yet touched on the connected component 2389 since there can be more than one constraint on a single cc */ 2390 for (j=1;j<size_of_constraint;j++) { 2391 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2392 min_index = row_cmat_global_indices[j]; 2393 min_loc = j; 2394 } 2395 } 2396 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2397 constraints_index[n++] = row_cmat_indices[min_loc]; 2398 } 2399 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2400 } 2401 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2402 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2403 } 2404 } 2405 *n_constraints = n; 2406 if (constraints_idx) *constraints_idx = constraints_index; 2407 PetscFunctionReturn(0); 2408 } 2409 2410 /* the next two functions has been adapted from pcis.c */ 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "PCBDDCApplySchur" 2413 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2414 { 2415 PetscErrorCode ierr; 2416 PC_IS *pcis = (PC_IS*)(pc->data); 2417 2418 PetscFunctionBegin; 2419 if (!vec2_B) { vec2_B = v; } 2420 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2421 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2422 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2423 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2424 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2430 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2431 { 2432 PetscErrorCode ierr; 2433 PC_IS *pcis = (PC_IS*)(pc->data); 2434 2435 PetscFunctionBegin; 2436 if (!vec2_B) { vec2_B = v; } 2437 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2438 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2439 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2440 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2441 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 #undef __FUNCT__ 2446 #define __FUNCT__ "PCBDDCSubsetNumbering" 2447 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2448 { 2449 Vec local_vec,global_vec; 2450 IS seqis,paris; 2451 VecScatter scatter_ctx; 2452 PetscScalar *array; 2453 PetscInt *temp_global_dofs; 2454 PetscScalar globalsum; 2455 PetscInt i,j,s; 2456 PetscInt nlocals,first_index,old_index,max_local; 2457 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2458 PetscMPIInt *dof_sizes,*dof_displs; 2459 PetscBool first_found; 2460 PetscErrorCode ierr; 2461 2462 PetscFunctionBegin; 2463 /* mpi buffers */ 2464 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2465 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2466 j = ( !rank_prec_comm ? size_prec_comm : 0); 2467 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2468 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2469 /* get maximum size of subset */ 2470 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2471 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2472 max_local = 0; 2473 for (i=0;i<n_local_dofs;i++) { 2474 if (max_local < temp_global_dofs[i] ) { 2475 max_local = temp_global_dofs[i]; 2476 } 2477 } 2478 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2479 max_global++; 2480 max_local = 0; 2481 for (i=0;i<n_local_dofs;i++) { 2482 if (max_local < local_dofs[i] ) { 2483 max_local = local_dofs[i]; 2484 } 2485 } 2486 max_local++; 2487 /* allocate workspace */ 2488 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2489 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2490 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2491 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2492 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2493 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2494 /* create scatter */ 2495 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2496 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2497 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2498 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2499 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2500 /* init array */ 2501 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2502 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2503 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2504 if (local_dofs_mult) { 2505 for (i=0;i<n_local_dofs;i++) { 2506 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2507 } 2508 } else { 2509 for (i=0;i<n_local_dofs;i++) { 2510 array[local_dofs[i]]=1.0; 2511 } 2512 } 2513 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2514 /* scatter into global vec and get total number of global dofs */ 2515 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2516 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2517 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2518 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2519 /* Fill global_vec with cumulative function for global numbering */ 2520 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2521 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2522 nlocals = 0; 2523 first_index = -1; 2524 first_found = PETSC_FALSE; 2525 for (i=0;i<s;i++) { 2526 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2527 first_found = PETSC_TRUE; 2528 first_index = i; 2529 } 2530 nlocals += (PetscInt)PetscRealPart(array[i]); 2531 } 2532 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2533 if (!rank_prec_comm) { 2534 dof_displs[0]=0; 2535 for (i=1;i<size_prec_comm;i++) { 2536 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2537 } 2538 } 2539 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2540 if (first_found) { 2541 array[first_index] += (PetscScalar)nlocals; 2542 old_index = first_index; 2543 for (i=first_index+1;i<s;i++) { 2544 if (PetscRealPart(array[i]) > 0.1) { 2545 array[i] += array[old_index]; 2546 old_index = i; 2547 } 2548 } 2549 } 2550 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2551 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2552 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2553 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2554 /* get global ordering of local dofs */ 2555 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2556 if (local_dofs_mult) { 2557 for (i=0;i<n_local_dofs;i++) { 2558 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2559 } 2560 } else { 2561 for (i=0;i<n_local_dofs;i++) { 2562 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2563 } 2564 } 2565 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2566 /* free workspace */ 2567 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2568 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2569 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2570 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2571 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2572 /* return pointer to global ordering of local dofs */ 2573 *global_numbering_subset = temp_global_dofs; 2574 PetscFunctionReturn(0); 2575 } 2576 2577 #undef __FUNCT__ 2578 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2579 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2580 { 2581 PetscInt i,j; 2582 PetscScalar *alphas; 2583 PetscErrorCode ierr; 2584 2585 PetscFunctionBegin; 2586 /* this implements stabilized Gram-Schmidt */ 2587 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2588 for (i=0;i<n;i++) { 2589 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2590 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2591 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2592 } 2593 ierr = PetscFree(alphas);CHKERRQ(ierr); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 /* TODO 2598 - now preallocation is done assuming SEQDENSE local matrices 2599 */ 2600 #undef __FUNCT__ 2601 #define __FUNCT__ "MatISGetMPIXAIJ" 2602 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2603 { 2604 Mat new_mat; 2605 Mat_IS *matis = (Mat_IS*)(mat->data); 2606 /* info on mat */ 2607 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2608 PetscInt bs,rows,cols; 2609 PetscInt lrows,lcols; 2610 PetscInt local_rows,local_cols; 2611 PetscBool isdense; 2612 /* values insertion */ 2613 PetscScalar *array; 2614 PetscInt *local_indices,*global_indices; 2615 /* work */ 2616 PetscInt i,j,index_row; 2617 PetscErrorCode ierr; 2618 2619 PetscFunctionBegin; 2620 /* MISSING CHECKS 2621 - rectangular case not covered (it is not allowed by MATIS) 2622 */ 2623 /* get info from mat */ 2624 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2625 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2626 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2627 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2628 2629 /* work */ 2630 ierr = PetscMalloc1(local_rows,&local_indices);CHKERRQ(ierr); 2631 for (i=0;i<local_rows;i++) local_indices[i]=i; 2632 /* map indices of local mat to global values */ 2633 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2634 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2635 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2636 2637 if (reuse==MAT_INITIAL_MATRIX) { 2638 Vec vec_dnz,vec_onz; 2639 PetscScalar *my_dnz,*my_onz; 2640 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2641 PetscInt index_col,owner; 2642 PetscMPIInt nsubdomains; 2643 2644 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2645 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2646 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2647 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2648 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2649 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2650 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2651 2652 /* 2653 preallocation 2654 */ 2655 2656 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2657 /* 2658 Some vectors are needed to sum up properly on shared interface dofs. 2659 Preallocation macros cannot do the job. 2660 Note that preallocation is not exact, since it overestimates nonzeros 2661 */ 2662 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2663 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2664 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2665 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2666 /* All processes need to compute entire row ownership */ 2667 ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr); 2668 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2669 for (i=0;i<nsubdomains;i++) { 2670 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2671 row_ownership[j]=i; 2672 } 2673 } 2674 2675 /* 2676 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2677 then, they will be summed up properly. This way, preallocation is always sufficient 2678 */ 2679 ierr = PetscMalloc1(local_rows,&my_dnz);CHKERRQ(ierr); 2680 ierr = PetscMalloc1(local_rows,&my_onz);CHKERRQ(ierr); 2681 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2682 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2683 for (i=0;i<local_rows;i++) { 2684 index_row = global_indices[i]; 2685 for (j=i;j<local_rows;j++) { 2686 owner = row_ownership[index_row]; 2687 index_col = global_indices[j]; 2688 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2689 my_dnz[i] += 1.0; 2690 } else { /* offdiag block */ 2691 my_onz[i] += 1.0; 2692 } 2693 /* same as before, interchanging rows and cols */ 2694 if (i != j) { 2695 owner = row_ownership[index_col]; 2696 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2697 my_dnz[j] += 1.0; 2698 } else { 2699 my_onz[j] += 1.0; 2700 } 2701 } 2702 } 2703 } 2704 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2705 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2706 if (local_rows) { /* multilevel guard */ 2707 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2708 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2709 } 2710 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2711 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2712 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2713 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2714 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2715 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2716 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2717 2718 /* set computed preallocation in dnz and onz */ 2719 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2720 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2721 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2722 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2723 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2724 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2725 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2726 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2727 2728 /* Resize preallocation if overestimated */ 2729 for (i=0;i<lrows;i++) { 2730 dnz[i] = PetscMin(dnz[i],lcols); 2731 onz[i] = PetscMin(onz[i],cols-lcols); 2732 } 2733 /* set preallocation */ 2734 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2735 for (i=0;i<lrows/bs;i++) { 2736 dnz[i] = dnz[i*bs]/bs; 2737 onz[i] = onz[i*bs]/bs; 2738 } 2739 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2740 for (i=0;i<lrows/bs;i++) { 2741 dnz[i] = dnz[i]-i; 2742 } 2743 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2744 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2745 *M = new_mat; 2746 } else { 2747 PetscInt mbs,mrows,mcols; 2748 /* some checks */ 2749 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2750 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2751 if (mrows != rows) { 2752 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2753 } 2754 if (mrows != rows) { 2755 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2756 } 2757 if (mbs != bs) { 2758 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2759 } 2760 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2761 } 2762 /* set local to global mappings */ 2763 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2764 /* Set values */ 2765 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2766 if (isdense) { /* special case for dense local matrices */ 2767 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2768 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2769 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2770 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2771 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2772 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2773 } else { /* very basic values insertion for all other matrix types */ 2774 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2775 for (i=0;i<local_rows;i++) { 2776 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2777 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2778 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2779 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2780 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2781 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2782 } 2783 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2784 } 2785 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2786 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2787 if (isdense) { 2788 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2789 } 2790 PetscFunctionReturn(0); 2791 } 2792 2793 #undef __FUNCT__ 2794 #define __FUNCT__ "MatISGetSubassemblingPattern" 2795 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2796 { 2797 Mat subdomain_adj; 2798 IS new_ranks,ranks_send_to; 2799 MatPartitioning partitioner; 2800 Mat_IS *matis; 2801 PetscInt n_neighs,*neighs,*n_shared,**shared; 2802 PetscInt prank; 2803 PetscMPIInt size,rank,color; 2804 PetscInt *xadj,*adjncy,*oldranks; 2805 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2806 PetscInt i,j,local_size,threshold=0; 2807 PetscErrorCode ierr; 2808 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2809 PetscSubcomm subcomm; 2810 2811 PetscFunctionBegin; 2812 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2813 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2814 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2815 2816 /* Get info on mapping */ 2817 matis = (Mat_IS*)(mat->data); 2818 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2819 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2820 2821 /* build local CSR graph of subdomains' connectivity */ 2822 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2823 xadj[0] = 0; 2824 xadj[1] = PetscMax(n_neighs-1,0); 2825 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2826 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2827 2828 if (threshold) { 2829 PetscInt* count,min_threshold; 2830 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2831 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2832 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2833 for (j=0;j<n_shared[i];j++) { 2834 count[shared[i][j]] += 1; 2835 } 2836 } 2837 /* adapt threshold since we dont want to lose significant connections */ 2838 min_threshold = n_neighs; 2839 for (i=1;i<n_neighs;i++) { 2840 for (j=0;j<n_shared[i];j++) { 2841 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2842 } 2843 } 2844 threshold = PetscMax(min_threshold+1,threshold); 2845 xadj[1] = 0; 2846 for (i=1;i<n_neighs;i++) { 2847 for (j=0;j<n_shared[i];j++) { 2848 if (count[shared[i][j]] < threshold) { 2849 adjncy[xadj[1]] = neighs[i]; 2850 adjncy_wgt[xadj[1]] = n_shared[i]; 2851 xadj[1]++; 2852 break; 2853 } 2854 } 2855 } 2856 ierr = PetscFree(count);CHKERRQ(ierr); 2857 } else { 2858 if (xadj[1]) { 2859 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2860 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2861 } 2862 } 2863 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2864 if (use_square) { 2865 for (i=0;i<xadj[1];i++) { 2866 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2867 } 2868 } 2869 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2870 2871 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2872 2873 /* 2874 Restrict work on active processes only. 2875 */ 2876 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2877 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2878 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2879 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2880 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2881 if (color) { 2882 ierr = PetscFree(xadj);CHKERRQ(ierr); 2883 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2884 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2885 } else { 2886 PetscInt coarsening_ratio; 2887 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2888 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2889 prank = rank; 2890 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2891 /* 2892 for (i=0;i<size;i++) { 2893 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2894 } 2895 */ 2896 for (i=0;i<xadj[1];i++) { 2897 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2898 } 2899 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2900 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2901 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2902 2903 /* Partition */ 2904 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2905 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2906 if (use_vwgt) { 2907 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2908 v_wgt[0] = local_size; 2909 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2910 } 2911 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2912 coarsening_ratio = size/n_subdomains; 2913 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2914 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2915 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2916 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2917 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2918 2919 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2920 if (contiguous) { 2921 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2922 } else { 2923 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2924 } 2925 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2926 /* clean up */ 2927 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2928 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2929 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2930 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2931 } 2932 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2933 2934 /* assemble parallel IS for sends */ 2935 i = 1; 2936 if (color) i=0; 2937 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2938 2939 /* get back IS */ 2940 *is_sends = ranks_send_to; 2941 PetscFunctionReturn(0); 2942 } 2943 2944 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2945 2946 #undef __FUNCT__ 2947 #define __FUNCT__ "MatISSubassemble" 2948 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2949 { 2950 Mat local_mat; 2951 Mat_IS *matis; 2952 IS is_sends_internal; 2953 PetscInt rows,cols; 2954 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2955 PetscBool ismatis,isdense,destroy_mat; 2956 ISLocalToGlobalMapping l2gmap; 2957 PetscInt* l2gmap_indices; 2958 const PetscInt* is_indices; 2959 MatType new_local_type; 2960 /* buffers */ 2961 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2962 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2963 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2964 /* MPI */ 2965 MPI_Comm comm,comm_n; 2966 PetscSubcomm subcomm; 2967 PetscMPIInt n_sends,n_recvs,commsize; 2968 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2969 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2970 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2971 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2972 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 /* TODO: add missing checks */ 2977 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2978 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2979 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2980 PetscValidLogicalCollectiveInt(mat,nis,7); 2981 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2982 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2983 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2984 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2985 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2986 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2987 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2988 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2989 PetscInt mrows,mcols,mnrows,mncols; 2990 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2991 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2992 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2993 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2994 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2995 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2996 } 2997 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2998 PetscValidLogicalCollectiveInt(mat,bs,0); 2999 /* prepare IS for sending if not provided */ 3000 if (!is_sends) { 3001 PetscBool pcontig = PETSC_TRUE; 3002 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3003 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3004 } else { 3005 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3006 is_sends_internal = is_sends; 3007 } 3008 3009 /* get pointer of MATIS data */ 3010 matis = (Mat_IS*)mat->data; 3011 3012 /* get comm */ 3013 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3014 3015 /* compute number of sends */ 3016 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3017 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3018 3019 /* compute number of receives */ 3020 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3021 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3022 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3023 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3024 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3025 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3026 ierr = PetscFree(iflags);CHKERRQ(ierr); 3027 3028 /* restrict comm if requested */ 3029 subcomm = 0; 3030 destroy_mat = PETSC_FALSE; 3031 if (restrict_comm) { 3032 PetscMPIInt color,rank,subcommsize; 3033 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3034 color = 0; 3035 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3036 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3037 subcommsize = commsize - subcommsize; 3038 /* check if reuse has been requested */ 3039 if (reuse == MAT_REUSE_MATRIX) { 3040 if (*mat_n) { 3041 PetscMPIInt subcommsize2; 3042 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3043 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3044 comm_n = PetscObjectComm((PetscObject)*mat_n); 3045 } else { 3046 comm_n = PETSC_COMM_SELF; 3047 } 3048 } else { /* MAT_INITIAL_MATRIX */ 3049 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3050 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3051 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3052 comm_n = subcomm->comm; 3053 } 3054 /* flag to destroy *mat_n if not significative */ 3055 if (color) destroy_mat = PETSC_TRUE; 3056 } else { 3057 comm_n = comm; 3058 } 3059 3060 /* prepare send/receive buffers */ 3061 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3062 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3063 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3064 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3065 if (nis) { 3066 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 3067 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 3068 } 3069 3070 /* Get data from local matrices */ 3071 if (!isdense) { 3072 /* TODO: See below some guidelines on how to prepare the local buffers */ 3073 /* 3074 send_buffer_vals should contain the raw values of the local matrix 3075 send_buffer_idxs should contain: 3076 - MatType_PRIVATE type 3077 - PetscInt size_of_l2gmap 3078 - PetscInt global_row_indices[size_of_l2gmap] 3079 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3080 */ 3081 } else { 3082 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3083 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3084 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 3085 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3086 send_buffer_idxs[1] = i; 3087 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3088 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3089 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3090 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3091 for (i=0;i<n_sends;i++) { 3092 ilengths_vals[is_indices[i]] = len*len; 3093 ilengths_idxs[is_indices[i]] = len+2; 3094 } 3095 } 3096 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3097 /* additional is (if any) */ 3098 if (nis) { 3099 PetscMPIInt psum; 3100 PetscInt j; 3101 for (j=0,psum=0;j<nis;j++) { 3102 PetscInt plen; 3103 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3104 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3105 psum += len+1; /* indices + lenght */ 3106 } 3107 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 3108 for (j=0,psum=0;j<nis;j++) { 3109 PetscInt plen; 3110 const PetscInt *is_array_idxs; 3111 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3112 send_buffer_idxs_is[psum] = plen; 3113 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3114 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3115 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3116 psum += plen+1; /* indices + lenght */ 3117 } 3118 for (i=0;i<n_sends;i++) { 3119 ilengths_idxs_is[is_indices[i]] = psum; 3120 } 3121 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3122 } 3123 3124 buf_size_idxs = 0; 3125 buf_size_vals = 0; 3126 buf_size_idxs_is = 0; 3127 for (i=0;i<n_recvs;i++) { 3128 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3129 buf_size_vals += (PetscInt)olengths_vals[i]; 3130 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3131 } 3132 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3133 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3134 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3135 3136 /* get new tags for clean communications */ 3137 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3138 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3139 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3140 3141 /* allocate for requests */ 3142 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3143 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3144 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3145 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3146 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3147 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3148 3149 /* communications */ 3150 ptr_idxs = recv_buffer_idxs; 3151 ptr_vals = recv_buffer_vals; 3152 ptr_idxs_is = recv_buffer_idxs_is; 3153 for (i=0;i<n_recvs;i++) { 3154 source_dest = onodes[i]; 3155 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3156 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3157 ptr_idxs += olengths_idxs[i]; 3158 ptr_vals += olengths_vals[i]; 3159 if (nis) { 3160 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3161 ptr_idxs_is += olengths_idxs_is[i]; 3162 } 3163 } 3164 for (i=0;i<n_sends;i++) { 3165 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3166 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3167 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3168 if (nis) { 3169 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3170 } 3171 } 3172 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3173 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3174 3175 /* assemble new l2g map */ 3176 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3177 ptr_idxs = recv_buffer_idxs; 3178 buf_size_idxs = 0; 3179 for (i=0;i<n_recvs;i++) { 3180 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3181 ptr_idxs += olengths_idxs[i]; 3182 } 3183 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 3184 ptr_idxs = recv_buffer_idxs; 3185 buf_size_idxs = 0; 3186 for (i=0;i<n_recvs;i++) { 3187 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3188 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3189 ptr_idxs += olengths_idxs[i]; 3190 } 3191 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3192 ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3193 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3194 3195 /* infer new local matrix type from received local matrices type */ 3196 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3197 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3198 if (n_recvs) { 3199 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3200 ptr_idxs = recv_buffer_idxs; 3201 for (i=0;i<n_recvs;i++) { 3202 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3203 new_local_type_private = MATAIJ_PRIVATE; 3204 break; 3205 } 3206 ptr_idxs += olengths_idxs[i]; 3207 } 3208 switch (new_local_type_private) { 3209 case MATDENSE_PRIVATE: 3210 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3211 new_local_type = MATSEQAIJ; 3212 bs = 1; 3213 } else { /* if I receive only 1 dense matrix */ 3214 new_local_type = MATSEQDENSE; 3215 bs = 1; 3216 } 3217 break; 3218 case MATAIJ_PRIVATE: 3219 new_local_type = MATSEQAIJ; 3220 bs = 1; 3221 break; 3222 case MATBAIJ_PRIVATE: 3223 new_local_type = MATSEQBAIJ; 3224 break; 3225 case MATSBAIJ_PRIVATE: 3226 new_local_type = MATSEQSBAIJ; 3227 break; 3228 default: 3229 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3230 break; 3231 } 3232 } else { /* by default, new_local_type is seqdense */ 3233 new_local_type = MATSEQDENSE; 3234 bs = 1; 3235 } 3236 3237 /* create MATIS object if needed */ 3238 if (reuse == MAT_INITIAL_MATRIX) { 3239 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3240 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3241 } else { 3242 /* it also destroys the local matrices */ 3243 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3244 } 3245 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3246 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3247 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3248 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3249 3250 /* set values */ 3251 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3252 ptr_vals = recv_buffer_vals; 3253 ptr_idxs = recv_buffer_idxs; 3254 for (i=0;i<n_recvs;i++) { 3255 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3256 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3257 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3258 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3259 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3260 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3261 } else { 3262 /* TODO */ 3263 } 3264 ptr_idxs += olengths_idxs[i]; 3265 ptr_vals += olengths_vals[i]; 3266 } 3267 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3268 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3269 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3270 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3271 3272 #if 0 3273 if (!restrict_comm) { /* check */ 3274 Vec lvec,rvec; 3275 PetscReal infty_error; 3276 3277 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3278 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3279 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3280 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3281 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3282 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3283 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3284 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3285 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3286 } 3287 #endif 3288 3289 /* assemble new additional is (if any) */ 3290 if (nis) { 3291 PetscInt **temp_idxs,*count_is,j,psum; 3292 3293 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3294 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3295 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3296 ptr_idxs = recv_buffer_idxs_is; 3297 psum = 0; 3298 for (i=0;i<n_recvs;i++) { 3299 for (j=0;j<nis;j++) { 3300 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3301 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3302 psum += plen; 3303 ptr_idxs += plen+1; /* shift pointer to received data */ 3304 } 3305 } 3306 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3307 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3308 for (i=1;i<nis;i++) { 3309 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3310 } 3311 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3312 ptr_idxs = recv_buffer_idxs_is; 3313 for (i=0;i<n_recvs;i++) { 3314 for (j=0;j<nis;j++) { 3315 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3316 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3317 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3318 ptr_idxs += plen+1; /* shift pointer to received data */ 3319 } 3320 } 3321 for (i=0;i<nis;i++) { 3322 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3323 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3324 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3325 } 3326 ierr = PetscFree(count_is);CHKERRQ(ierr); 3327 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3328 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3329 } 3330 /* free workspace */ 3331 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3332 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3333 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3334 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3335 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3336 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3337 if (isdense) { 3338 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3339 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3340 } else { 3341 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3342 } 3343 if (nis) { 3344 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3345 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3346 } 3347 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3348 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3349 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3350 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3351 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3352 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3353 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3354 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3355 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3356 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3357 ierr = PetscFree(onodes);CHKERRQ(ierr); 3358 if (nis) { 3359 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3360 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3361 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3362 } 3363 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3364 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3365 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3366 for (i=0;i<nis;i++) { 3367 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3368 } 3369 } 3370 PetscFunctionReturn(0); 3371 } 3372 3373 /* temporary hack into ksp private data structure */ 3374 #include <petsc-private/kspimpl.h> 3375 3376 #undef __FUNCT__ 3377 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3378 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3379 { 3380 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3381 PC_IS *pcis = (PC_IS*)pc->data; 3382 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3383 MatNullSpace CoarseNullSpace=NULL; 3384 ISLocalToGlobalMapping coarse_islg; 3385 IS coarse_is,*isarray; 3386 PetscInt i,im_active=-1,active_procs=-1; 3387 PetscInt nis,nisdofs,nisneu; 3388 PC pc_temp; 3389 PCType coarse_pc_type; 3390 KSPType coarse_ksp_type; 3391 PetscBool multilevel_requested,multilevel_allowed; 3392 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3393 Mat t_coarse_mat_is; 3394 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3395 PetscMPIInt all_procs; 3396 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3397 PetscBool compute_vecs = PETSC_FALSE; 3398 PetscErrorCode ierr; 3399 3400 PetscFunctionBegin; 3401 /* Assign global numbering to coarse dofs */ 3402 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3403 compute_vecs = PETSC_TRUE; 3404 PetscInt ocoarse_size; 3405 ocoarse_size = pcbddc->coarse_size; 3406 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3407 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3408 /* see if we can avoid some work */ 3409 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3410 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3411 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3412 coarse_reuse = PETSC_FALSE; 3413 } else { /* we can safely reuse already computed coarse matrix */ 3414 coarse_reuse = PETSC_TRUE; 3415 } 3416 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3417 coarse_reuse = PETSC_FALSE; 3418 } 3419 /* reset any subassembling information */ 3420 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3421 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3422 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3423 coarse_reuse = PETSC_TRUE; 3424 } 3425 3426 /* count "active" (i.e. with positive local size) and "void" processes */ 3427 im_active = !!(pcis->n); 3428 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3429 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3430 void_procs = all_procs-active_procs; 3431 csin_type_simple = PETSC_TRUE; 3432 if (pcbddc->current_level) { 3433 csin_ml = PETSC_TRUE; 3434 ncoarse_ml = void_procs; 3435 csin_ds = PETSC_TRUE; 3436 ncoarse_ds = void_procs; 3437 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3438 } else { 3439 csin_ml = PETSC_FALSE; 3440 ncoarse_ml = all_procs; 3441 if (void_procs) { 3442 csin_ds = PETSC_TRUE; 3443 ncoarse_ds = void_procs; 3444 csin_type_simple = PETSC_FALSE; 3445 } else { 3446 csin_ds = PETSC_FALSE; 3447 ncoarse_ds = all_procs; 3448 } 3449 } 3450 3451 /* 3452 test if we can go multilevel: three conditions must be satisfied: 3453 - we have not exceeded the number of levels requested 3454 - we can actually subassemble the active processes 3455 - we can find a suitable number of MPI processes where we can place the subassembled problem 3456 */ 3457 multilevel_allowed = PETSC_FALSE; 3458 multilevel_requested = PETSC_FALSE; 3459 if (pcbddc->current_level < pcbddc->max_levels) { 3460 multilevel_requested = PETSC_TRUE; 3461 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3462 multilevel_allowed = PETSC_FALSE; 3463 } else { 3464 multilevel_allowed = PETSC_TRUE; 3465 } 3466 } 3467 /* determine number of process partecipating to coarse solver */ 3468 if (multilevel_allowed) { 3469 ncoarse = ncoarse_ml; 3470 csin = csin_ml; 3471 } else { 3472 ncoarse = ncoarse_ds; 3473 csin = csin_ds; 3474 } 3475 3476 /* creates temporary l2gmap and IS for coarse indexes */ 3477 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3478 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3479 3480 /* creates temporary MATIS object for coarse matrix */ 3481 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3482 #if 0 3483 { 3484 PetscViewer viewer; 3485 char filename[256]; 3486 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3487 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3488 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3489 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3490 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3491 } 3492 #endif 3493 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3494 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3495 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3496 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3497 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3498 3499 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3500 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3501 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3502 const PetscInt *idxs; 3503 ISLocalToGlobalMapping tmap; 3504 3505 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3506 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3507 /* allocate space for temporary storage */ 3508 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3509 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3510 /* allocate for IS array */ 3511 nisdofs = pcbddc->n_ISForDofsLocal; 3512 nisneu = !!pcbddc->NeumannBoundariesLocal; 3513 nis = nisdofs + nisneu; 3514 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3515 /* dofs splitting */ 3516 for (i=0;i<nisdofs;i++) { 3517 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3518 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3519 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3520 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3521 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3522 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3523 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3524 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3525 } 3526 /* neumann boundaries */ 3527 if (pcbddc->NeumannBoundariesLocal) { 3528 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3529 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3530 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3531 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3532 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3533 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3534 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3535 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3536 } 3537 /* free memory */ 3538 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3539 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3540 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3541 } else { 3542 nis = 0; 3543 nisdofs = 0; 3544 nisneu = 0; 3545 isarray = NULL; 3546 } 3547 /* destroy no longer needed map */ 3548 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3549 3550 /* restrict on coarse candidates (if needed) */ 3551 coarse_mat_is = NULL; 3552 if (csin) { 3553 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3554 PetscInt j,tissize,*nisindices; 3555 PetscInt *coarse_candidates; 3556 const PetscInt* tisindices; 3557 /* get coarse candidates' ranks in pc communicator */ 3558 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3559 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3560 for (i=0,j=0;i<all_procs;i++) { 3561 if (!coarse_candidates[i]) { 3562 coarse_candidates[j]=i; 3563 j++; 3564 } 3565 } 3566 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3567 /* get a suitable subassembling pattern */ 3568 if (csin_type_simple) { 3569 PetscMPIInt rank; 3570 PetscInt issize,isidx; 3571 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3572 if (im_active) { 3573 issize = 1; 3574 isidx = (PetscInt)rank; 3575 } else { 3576 issize = 0; 3577 isidx = -1; 3578 } 3579 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3580 } else { 3581 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3582 } 3583 if (pcbddc->dbg_flag) { 3584 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3585 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3586 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3587 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3588 for (i=0;i<j;i++) { 3589 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3590 } 3591 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3592 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3593 } 3594 /* shift the pattern on coarse candidates */ 3595 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3596 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3597 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3598 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3599 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3600 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3601 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3602 } 3603 if (pcbddc->dbg_flag) { 3604 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3605 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3606 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3607 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3608 } 3609 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3610 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3611 } else { 3612 if (pcbddc->dbg_flag) { 3613 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3614 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3615 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3616 } 3617 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3618 coarse_mat_is = t_coarse_mat_is; 3619 } 3620 3621 /* create local to global scatters for coarse problem */ 3622 if (compute_vecs) { 3623 PetscInt lrows; 3624 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3625 if (coarse_mat_is) { 3626 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3627 } else { 3628 lrows = 0; 3629 } 3630 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3631 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3632 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3633 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3634 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3635 } 3636 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3637 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3638 3639 /* set defaults for coarse KSP and PC */ 3640 if (multilevel_allowed) { 3641 coarse_ksp_type = KSPRICHARDSON; 3642 coarse_pc_type = PCBDDC; 3643 } else { 3644 coarse_ksp_type = KSPPREONLY; 3645 coarse_pc_type = PCREDUNDANT; 3646 } 3647 3648 /* print some info if requested */ 3649 if (pcbddc->dbg_flag) { 3650 if (!multilevel_allowed) { 3651 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3652 if (multilevel_requested) { 3653 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3654 } else if (pcbddc->max_levels) { 3655 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3656 } 3657 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3658 } 3659 } 3660 3661 /* create the coarse KSP object only once with defaults */ 3662 if (coarse_mat_is) { 3663 MatReuse coarse_mat_reuse; 3664 PetscViewer dbg_viewer = NULL; 3665 if (pcbddc->dbg_flag) { 3666 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3667 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3668 } 3669 if (!pcbddc->coarse_ksp) { 3670 char prefix[256],str_level[16]; 3671 size_t len; 3672 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3673 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3674 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3675 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3676 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3677 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3678 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3679 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3680 /* prefix */ 3681 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3682 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3683 if (!pcbddc->current_level) { 3684 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3685 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3686 } else { 3687 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3688 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3689 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3690 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3691 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3692 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3693 } 3694 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3695 /* allow user customization */ 3696 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3697 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3698 } 3699 3700 /* get some info after set from options */ 3701 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3702 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3703 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3704 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3705 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3706 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3707 isbddc = PETSC_FALSE; 3708 } 3709 if (isredundant) { 3710 KSP inner_ksp; 3711 PC inner_pc; 3712 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3713 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3714 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3715 } 3716 3717 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3718 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3719 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3720 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3721 if (nisdofs) { 3722 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3723 for (i=0;i<nisdofs;i++) { 3724 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3725 } 3726 } 3727 if (nisneu) { 3728 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3729 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3730 } 3731 3732 /* assemble coarse matrix */ 3733 if (coarse_reuse) { 3734 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3735 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3736 coarse_mat_reuse = MAT_REUSE_MATRIX; 3737 } else { 3738 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3739 } 3740 if (isbddc || isnn) { 3741 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3742 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3743 if (pcbddc->dbg_flag) { 3744 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3745 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3746 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3747 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3748 } 3749 } 3750 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3751 } else { 3752 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3753 } 3754 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3755 3756 /* propagate symmetry info to coarse matrix */ 3757 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3758 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3759 3760 /* set operators */ 3761 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3762 if (pcbddc->dbg_flag) { 3763 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3764 } 3765 } else { /* processes non partecipating to coarse solver (if any) */ 3766 coarse_mat = 0; 3767 } 3768 ierr = PetscFree(isarray);CHKERRQ(ierr); 3769 #if 0 3770 { 3771 PetscViewer viewer; 3772 char filename[256]; 3773 sprintf(filename,"coarse_mat.m"); 3774 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3775 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3776 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3777 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3778 } 3779 #endif 3780 3781 /* Compute coarse null space (special handling by BDDC only) */ 3782 if (pcbddc->NullSpace) { 3783 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3784 } 3785 3786 if (pcbddc->coarse_ksp) { 3787 Vec crhs,csol; 3788 PetscBool ispreonly; 3789 if (CoarseNullSpace) { 3790 if (isbddc) { 3791 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3792 } else { 3793 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3794 } 3795 } 3796 /* setup coarse ksp */ 3797 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3798 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3799 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3800 /* hack */ 3801 if (!csol) { 3802 ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3803 } 3804 if (!crhs) { 3805 ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3806 } 3807 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3808 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3809 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3810 KSP check_ksp; 3811 KSPType check_ksp_type; 3812 PC check_pc; 3813 Vec check_vec,coarse_vec; 3814 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3815 PetscInt its; 3816 PetscBool compute_eigs; 3817 PetscReal *eigs_r,*eigs_c; 3818 PetscInt neigs; 3819 const char *prefix; 3820 3821 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3822 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3823 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3824 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3825 if (ispreonly) { 3826 check_ksp_type = KSPPREONLY; 3827 compute_eigs = PETSC_FALSE; 3828 } else { 3829 check_ksp_type = KSPGMRES; 3830 compute_eigs = PETSC_TRUE; 3831 } 3832 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3833 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3834 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3835 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3836 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3837 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3838 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3839 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3840 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3841 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3842 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3843 /* create random vec */ 3844 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3845 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3846 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3847 if (CoarseNullSpace) { 3848 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3849 } 3850 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3851 /* solve coarse problem */ 3852 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3853 if (CoarseNullSpace) { 3854 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3855 } 3856 /* set eigenvalue estimation if preonly has not been requested */ 3857 if (compute_eigs) { 3858 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3859 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3860 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3861 lambda_max = eigs_r[neigs-1]; 3862 lambda_min = eigs_r[0]; 3863 if (pcbddc->use_coarse_estimates) { 3864 if (lambda_max>lambda_min) { 3865 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3866 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3867 } 3868 } 3869 } 3870 3871 /* check coarse problem residual error */ 3872 if (pcbddc->dbg_flag) { 3873 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3874 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3875 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3876 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3877 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3878 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3879 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3880 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3881 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3882 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3883 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3884 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3885 if (compute_eigs) { 3886 PetscReal lambda_max_s,lambda_min_s; 3887 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3888 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3889 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3890 for (i=0;i<neigs;i++) { 3891 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3892 } 3893 } 3894 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3895 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3896 } 3897 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3898 if (compute_eigs) { 3899 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3900 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3901 } 3902 } 3903 } 3904 /* print additional info */ 3905 if (pcbddc->dbg_flag) { 3906 /* waits until all processes reaches this point */ 3907 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3908 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3909 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3910 } 3911 3912 /* free memory */ 3913 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3914 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3915 PetscFunctionReturn(0); 3916 } 3917 3918 #undef __FUNCT__ 3919 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3920 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3921 { 3922 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3923 PC_IS* pcis = (PC_IS*)pc->data; 3924 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3925 PetscInt i,coarse_size; 3926 PetscInt *local_primal_indices; 3927 PetscErrorCode ierr; 3928 3929 PetscFunctionBegin; 3930 /* Compute global number of coarse dofs */ 3931 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3932 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3933 } 3934 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3935 3936 /* check numbering */ 3937 if (pcbddc->dbg_flag) { 3938 PetscScalar coarsesum,*array; 3939 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 3940 3941 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3942 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3943 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3944 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3945 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3946 for (i=0;i<pcbddc->local_primal_size;i++) { 3947 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3948 } 3949 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3950 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3951 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3952 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3953 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3954 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3955 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3956 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3957 for (i=0;i<pcis->n;i++) { 3958 if (array[i] == 1.0) { 3959 set_error = PETSC_TRUE; 3960 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3961 } 3962 } 3963 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3964 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3965 for (i=0;i<pcis->n;i++) { 3966 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3967 } 3968 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3969 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3970 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3971 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3972 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3973 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3974 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 3975 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3976 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3977 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3978 for (i=0;i<pcbddc->local_primal_size;i++) { 3979 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3980 } 3981 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3982 } 3983 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3984 if (set_error_reduced) { 3985 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 3986 } 3987 } 3988 /* get back data */ 3989 *coarse_size_n = coarse_size; 3990 *local_primal_indices_n = local_primal_indices; 3991 PetscFunctionReturn(0); 3992 } 3993 3994 #undef __FUNCT__ 3995 #define __FUNCT__ "PCBDDCGlobalToLocal" 3996 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 3997 { 3998 IS localis_t; 3999 PetscInt i,lsize,*idxs,n; 4000 PetscScalar *vals; 4001 PetscErrorCode ierr; 4002 4003 PetscFunctionBegin; 4004 /* get indices in local ordering exploiting local to global map */ 4005 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4006 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 4007 for (i=0;i<lsize;i++) vals[i] = 1.0; 4008 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4009 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4010 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4011 if (idxs) { /* multilevel guard */ 4012 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4013 } 4014 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4015 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4016 ierr = PetscFree(vals);CHKERRQ(ierr); 4017 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4018 /* now compute set in local ordering */ 4019 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4020 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4021 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4022 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4023 for (i=0,lsize=0;i<n;i++) { 4024 if (PetscRealPart(vals[i]) > 0.5) { 4025 lsize++; 4026 } 4027 } 4028 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 4029 for (i=0,lsize=0;i<n;i++) { 4030 if (PetscRealPart(vals[i]) > 0.5) { 4031 idxs[lsize++] = i; 4032 } 4033 } 4034 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4035 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4036 *localis = localis_t; 4037 PetscFunctionReturn(0); 4038 } 4039