1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 4 /* prototypes for deluxe functions */ 5 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 6 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 7 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 8 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Par(PC,PetscInt,PetscInt,PetscInt[],PetscInt[]); 9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq(PC,PetscInt,PetscInt,PetscInt[],PetscInt[]); 10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq_New(PC); 11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "PCBDDCScalingExtension_Basic" 15 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) 16 { 17 PC_IS* pcis = (PC_IS*)pc->data; 18 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 /* Apply partition of unity */ 23 ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr); 24 ierr = VecSet(global_vector,0.0);CHKERRQ(ierr); 25 ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 26 ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 27 PetscFunctionReturn(0); 28 } 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "PCBDDCScalingExtension_Deluxe" 32 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) 33 { 34 PC_IS* pcis=(PC_IS*)pc->data; 35 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 36 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 37 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs[1]; 38 PetscInt i; 39 PetscErrorCode ierr; 40 41 /* TODO CHECK STUFF RELATED WITH FAKE WORK */ 42 PetscFunctionBegin; 43 ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr); /* needed by the fake work below */ 44 if (deluxe_ctx->n_simple) { 45 /* scale deluxe vertices using diagonal scaling */ 46 PetscScalar *array_x,*array_D,*array; 47 ierr = VecGetArray(x,&array_x);CHKERRQ(ierr); 48 ierr = VecGetArray(pcis->D,&array_D);CHKERRQ(ierr); 49 ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr); 50 for (i=0;i<deluxe_ctx->n_simple;i++) { 51 array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]]; 52 } 53 ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr); 54 ierr = VecRestoreArray(pcis->D,&array_D);CHKERRQ(ierr); 55 ierr = VecRestoreArray(x,&array_x);CHKERRQ(ierr); 56 } 57 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication and ksp solution */ 58 if (deluxe_ctx->seq_ksp) { 59 ierr = VecScatterBegin(deluxe_ctx->seq_scctx,x,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 60 ierr = VecScatterEnd(deluxe_ctx->seq_scctx,x,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 61 ierr = MatMult(sub_schurs->S_Ej_all,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2);CHKERRQ(ierr); 62 ierr = KSPSolve(deluxe_ctx->seq_ksp,deluxe_ctx->seq_work2,deluxe_ctx->seq_work1);CHKERRQ(ierr); 63 /* fake work due to final ADD VALUES and vertices scaling needed? TODO: check it */ 64 ierr = VecScatterBegin(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 65 ierr = VecScatterEnd(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 66 } 67 /* parallel part */ 68 for (i=0;i<deluxe_ctx->par_colors;i++) { 69 if (deluxe_ctx->par_ksp[i]) { 70 PetscMPIInt color_rank; 71 PetscInt subidx = deluxe_ctx->par_col2sub[i]; 72 /* restrict on subset */ 73 ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],x,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 74 ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],x,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 75 /* S_Ej */ 76 ierr = MatMult(sub_schurs->S_Ej[subidx],sub_schurs->work1[subidx],sub_schurs->work2[subidx]);CHKERRQ(ierr); 77 /* (\sum_j S_Ej)^-1 */ 78 ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr); 79 ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 80 ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81 ierr = KSPSolve(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr); 82 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)deluxe_ctx->par_ksp[i]),&color_rank);CHKERRQ(ierr); 83 /* get back solution on subset */ 84 ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 85 ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 86 if (!color_rank) { /* only the master process in coloured comm copies the computed values */ 87 ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],sub_schurs->work1[subidx],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 88 ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],sub_schurs->work1[subidx],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 89 } 90 } 91 } 92 /* put local boundary part in global vector */ 93 ierr = VecSet(y,0.0);CHKERRQ(ierr); 94 ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 95 ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCScalingExtension" 101 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) 102 { 103 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 104 PetscErrorCode ierr; 105 106 PetscFunctionBegin; 107 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 108 PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2); 109 PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3); 110 if (local_interface_vector == pcbddc->work_scaling) { 111 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n"); 112 } 113 ierr = PetscTryMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr); 114 PetscFunctionReturn(0); 115 } 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "PCBDDCScalingRestriction_Basic" 119 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) 120 { 121 PetscErrorCode ierr; 122 PC_IS* pcis = (PC_IS*)pc->data; 123 124 PetscFunctionBegin; 125 ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 126 ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 127 /* Apply partition of unity */ 128 ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "PCBDDCScalingRestriction_Deluxe" 134 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) 135 { 136 PC_IS* pcis=(PC_IS*)pc->data; 137 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 138 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 139 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs[1]; 140 PetscInt i; 141 PetscErrorCode ierr; 142 143 PetscFunctionBegin; 144 /* get local boundary part of global vector */ 145 ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 146 ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 147 if (deluxe_ctx->n_simple) { 148 /* scale deluxe vertices using diagonal scaling */ 149 PetscScalar *array_y,*array_D; 150 ierr = VecGetArray(y,&array_y);CHKERRQ(ierr); 151 ierr = VecGetArray(pcis->D,&array_D);CHKERRQ(ierr); 152 for (i=0;i<deluxe_ctx->n_simple;i++) { 153 array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; 154 } 155 ierr = VecRestoreArray(pcis->D,&array_D);CHKERRQ(ierr); 156 ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr); 157 } 158 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication and ksp solution */ 159 if (deluxe_ctx->seq_ksp) { 160 ierr = VecScatterBegin(deluxe_ctx->seq_scctx,y,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 161 ierr = VecScatterEnd(deluxe_ctx->seq_scctx,y,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 162 ierr = KSPSolveTranspose(deluxe_ctx->seq_ksp,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2);CHKERRQ(ierr); 163 ierr = MatMultTranspose(sub_schurs->S_Ej_all,deluxe_ctx->seq_work2,deluxe_ctx->seq_work1);CHKERRQ(ierr); 164 ierr = VecScatterBegin(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 165 ierr = VecScatterEnd(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 166 } 167 /* parallel part */ 168 for (i=0;i<deluxe_ctx->par_colors;i++) { 169 if (deluxe_ctx->par_ksp[i]) { 170 PetscInt subidx = deluxe_ctx->par_col2sub[i]; 171 /* restrict on subset */ 172 ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],y,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 173 ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],y,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 174 /* (\sum_j S_Ej)^-T */ 175 ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr); 176 ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 177 ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 178 ierr = KSPSolveTranspose(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr); 179 ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 180 ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 181 /* S_Ej^T */ 182 ierr = MatMultTranspose(sub_schurs->S_Ej[subidx],sub_schurs->work1[subidx],sub_schurs->work2[subidx]);CHKERRQ(ierr); 183 /* extend to boundary */ 184 ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],sub_schurs->work2[subidx],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 185 ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],sub_schurs->work2[subidx],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 186 } 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "PCBDDCScalingRestriction" 193 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) 194 { 195 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 196 PetscErrorCode ierr; 197 198 PetscFunctionBegin; 199 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 200 PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2); 201 PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3); 202 if (local_interface_vector == pcbddc->work_scaling) { 203 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Local vector should cannot be pcbddc->work_scaling!\n"); 204 } 205 ierr = PetscTryMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr); 206 PetscFunctionReturn(0); 207 } 208 209 #undef __FUNCT__ 210 #define __FUNCT__ "PCBDDCScalingSetUp" 211 PetscErrorCode PCBDDCScalingSetUp(PC pc) 212 { 213 PC_IS* pcis=(PC_IS*)pc->data; 214 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 219 /* create work vector for the operator */ 220 ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr); 221 ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr); 222 /* always rebuild pcis->D */ 223 if (pcis->use_stiffness_scaling) { 224 ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr); 225 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 226 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 227 } 228 ierr = VecCopy(pcis->D,pcis->vec1_B);CHKERRQ(ierr); 229 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 230 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 231 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 232 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 233 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 234 ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 235 /* now setup */ 236 if (pcbddc->use_deluxe_scaling) { 237 if (!pcbddc->deluxe_ctx) { 238 ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr); 239 } 240 ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr); 241 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr); 242 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr); 243 } else { 244 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr); 245 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr); 246 } 247 248 /* test */ 249 if (pcbddc->dbg_flag) { 250 Vec vec2_global; 251 PetscViewer viewer=pcbddc->dbg_viewer; 252 PetscReal error; 253 254 /* extension -> from local to parallel */ 255 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 256 ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr); 257 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 258 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 259 ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr); 260 ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr); 261 262 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 263 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 264 ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr); 265 ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr); 266 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr); 267 ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr); 268 if (error>1.e-8 && pcbddc->dbg_flag>1) { 269 ierr = VecView(pcis->vec1_global,viewer);CHKERRQ(ierr); 270 } 271 ierr = VecDestroy(&vec2_global);CHKERRQ(ierr); 272 273 /* restriction -> from parallel to local */ 274 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 275 ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr); 276 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 277 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 278 279 ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr); 280 ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr); 281 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 282 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 283 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr); 284 ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr); 285 if (error>1.e-8 && pcbddc->dbg_flag>1) { 286 ierr = VecView(pcis->vec1_global,viewer);CHKERRQ(ierr); 287 } 288 } 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "PCBDDCScalingDestroy" 294 PetscErrorCode PCBDDCScalingDestroy(PC pc) 295 { 296 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 297 PetscErrorCode ierr; 298 299 PetscFunctionBegin; 300 if (pcbddc->deluxe_ctx) { 301 ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr); 302 } 303 ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr); 304 /* remove functions */ 305 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr); 306 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "PCBDDCScalingCreate_Deluxe" 312 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) 313 { 314 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 315 PCBDDCDeluxeScaling deluxe_ctx; 316 PetscErrorCode ierr; 317 318 PetscFunctionBegin; 319 ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr); 320 pcbddc->deluxe_ctx = deluxe_ctx; 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "PCBDDCScalingDestroy_Deluxe" 326 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) 327 { 328 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 329 PetscErrorCode ierr; 330 331 PetscFunctionBegin; 332 ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr); 333 ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "PCBDDCScalingReset_Deluxe_Solvers" 339 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) 340 { 341 PetscErrorCode ierr; 342 343 PetscFunctionBegin; 344 ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 345 deluxe_ctx->n_simple = 0; 346 if (deluxe_ctx->seq_ksp) { 347 ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx);CHKERRQ(ierr); 348 ierr = VecDestroy(&deluxe_ctx->seq_work1);CHKERRQ(ierr); 349 ierr = VecDestroy(&deluxe_ctx->seq_work2);CHKERRQ(ierr); 350 ierr = KSPDestroy(&deluxe_ctx->seq_ksp);CHKERRQ(ierr); 351 } 352 if (deluxe_ctx->par_colors) { 353 PetscInt i; 354 for (i=0;i<deluxe_ctx->par_colors;i++) { 355 ierr = VecScatterDestroy(&deluxe_ctx->par_scctx_s[i]);CHKERRQ(ierr); 356 ierr = VecScatterDestroy(&deluxe_ctx->par_scctx_p[i]);CHKERRQ(ierr); 357 ierr = VecDestroy(&deluxe_ctx->par_vec[i]);CHKERRQ(ierr); 358 ierr = KSPDestroy(&deluxe_ctx->par_ksp[i]);CHKERRQ(ierr); 359 } 360 ierr = PetscFree5(deluxe_ctx->par_ksp, 361 deluxe_ctx->par_scctx_s, 362 deluxe_ctx->par_scctx_p, 363 deluxe_ctx->par_vec, 364 deluxe_ctx->par_col2sub);CHKERRQ(ierr); 365 } 366 deluxe_ctx->par_colors = 0; 367 PetscFunctionReturn(0); 368 } 369 370 #define OLD_CODE 0 371 #undef __FUNCT__ 372 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe" 373 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) 374 { 375 PC_IS *pcis=(PC_IS*)pc->data; 376 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 377 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 378 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs[1]; 379 PCBDDCGraph graph; 380 PetscBT bitmask; 381 #if OLD_CODE 382 IS *faces,*edges,*all_cc; 383 PetscInt *index_sequential,*index_parallel; 384 PetscInt *auxlocal_sequential,*auxlocal_parallel; 385 PetscInt *auxglobal_sequential,*auxglobal_parallel; 386 PetscInt *auxmapping,*idxs; 387 PetscInt i,max_subset_size; 388 PetscInt n_sequential_problems,n_local_sequential_problems,n_parallel_problems,n_local_parallel_problems; 389 PetscInt n_faces,n_edges,n_all_cc; 390 #else 391 PetscInt i; 392 const PetscInt* idxs; 393 Mat S_j; 394 PetscBool free_used_adj; 395 PetscInt *used_xadj,*used_adjncy; 396 #endif 397 PetscErrorCode ierr; 398 399 PetscFunctionBegin; 400 /* throw away the solvers */ 401 ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr); 402 403 /* attach interface graph for determining subsets */ 404 if (pcbddc->deluxe_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 405 PetscInt *idx_V_N; 406 IS verticesIS; 407 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&idx_V_N);CHKERRQ(ierr); 408 ierr = ISCreateGeneral(PETSC_COMM_SELF,i,idx_V_N,PETSC_OWN_POINTER,&verticesIS);CHKERRQ(ierr); 409 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 410 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 411 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 412 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 413 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 414 /* 415 if (pcbddc->dbg_flag) { 416 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 417 } 418 */ 419 } else { 420 graph = pcbddc->mat_graph; 421 } 422 423 #if OLD_CODE 424 /* get index sets for faces and edges */ 425 ierr = PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,NULL);CHKERRQ(ierr); 426 n_all_cc = n_faces+n_edges; 427 ierr = PetscMalloc1(n_all_cc,&all_cc);CHKERRQ(ierr); 428 for (i=0;i<n_faces;i++) { 429 all_cc[i] = faces[i]; 430 } 431 for (i=0;i<n_edges;i++) { 432 all_cc[n_faces+i] = edges[i]; 433 } 434 ierr = PetscFree(faces);CHKERRQ(ierr); 435 ierr = PetscFree(edges);CHKERRQ(ierr); 436 437 /* map interface's subsets */ 438 max_subset_size = 0; 439 for (i=0;i<n_all_cc;i++) { 440 PetscInt subset_size; 441 ierr = ISGetLocalSize(all_cc[i],&subset_size);CHKERRQ(ierr); 442 max_subset_size = PetscMax(max_subset_size,subset_size); 443 } 444 ierr = PetscMalloc5(max_subset_size,&auxmapping, 445 graph->ncc,&auxlocal_sequential, 446 graph->ncc,&auxlocal_parallel, 447 graph->ncc,&index_sequential, 448 graph->ncc,&index_parallel);CHKERRQ(ierr); 449 450 /* if threshold is negative, uses all sequential problems */ 451 if (pcbddc->deluxe_threshold < 0) pcbddc->deluxe_threshold = max_subset_size; 452 453 /* workspace */ 454 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 455 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs);CHKERRQ(ierr); 456 for (i=0;i<pcis->n-pcis->n_B;i++) { 457 ierr = PetscBTSet(bitmask,idxs[i]);CHKERRQ(ierr); 458 } 459 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs);CHKERRQ(ierr); 460 461 /* determine which problem has to be solved in parallel or sequentially */ 462 n_local_sequential_problems = 0; 463 n_local_parallel_problems = 0; 464 for (i=0;i<n_all_cc;i++) { 465 PetscInt subset_size,j,min_loc = 0; 466 467 ierr = ISGetLocalSize(all_cc[i],&subset_size);CHKERRQ(ierr); 468 ierr = ISGetIndices(all_cc[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 469 for (j=0;j<subset_size;j++) { 470 ierr = PetscBTSet(bitmask,idxs[j]);CHKERRQ(ierr); 471 } 472 ierr = ISLocalToGlobalMappingApply(graph->l2gmap,subset_size,idxs,auxmapping);CHKERRQ(ierr); 473 for (j=1;j<subset_size;j++) { 474 if (auxmapping[j]<auxmapping[min_loc]) { 475 min_loc = j; 476 } 477 } 478 if (subset_size > pcbddc->deluxe_threshold) { 479 index_parallel[n_local_parallel_problems] = i; 480 auxlocal_parallel[n_local_parallel_problems] = idxs[min_loc]; 481 n_local_parallel_problems++; 482 } else { 483 index_sequential[n_local_sequential_problems] = i; 484 auxlocal_sequential[n_local_sequential_problems] = idxs[min_loc]; 485 n_local_sequential_problems++; 486 } 487 ierr = ISRestoreIndices(all_cc[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 488 } 489 490 /* diagonal scaling on interface dofs not contained in cc */ 491 deluxe_ctx->n_simple = 0; 492 for (i=0;i<pcis->n;i++) { 493 if (!PetscBTLookup(bitmask,i)) { 494 deluxe_ctx->n_simple++; 495 } 496 } 497 ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 498 deluxe_ctx->n_simple = 0; 499 for (i=0;i<pcis->n;i++) { 500 if (!PetscBTLookup(bitmask,i)) { 501 deluxe_ctx->idx_simple_B[deluxe_ctx->n_simple++] = i; 502 } 503 } 504 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B,&i,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 505 if (i != deluxe_ctx->n_simple) { 506 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simple scaling dofs! %d != %d",i,deluxe_ctx->n_simple); 507 } 508 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 509 510 /* SetUp local schur complements on subsets TODO better reuse procedure */ 511 if (!sub_schurs->n_subs) { 512 Mat S_j; 513 PetscBool free_used_adj; 514 PetscInt *used_xadj,*used_adjncy; 515 516 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 517 free_used_adj = PETSC_FALSE; 518 if (pcbddc->deluxe_layers == -1) { 519 used_xadj = NULL; 520 used_adjncy = NULL; 521 } else { 522 if ((pcbddc->deluxe_use_useradj && pcbddc->mat_graph->xadj) || !pcbddc->deluxe_compute_rowadj) { 523 used_xadj = pcbddc->mat_graph->xadj; 524 used_adjncy = pcbddc->mat_graph->adjncy; 525 } else { 526 Mat mat_adj; 527 PetscBool flg_row=PETSC_TRUE; 528 const PetscInt *xadj,*adjncy; 529 PetscInt nvtxs; 530 531 ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 532 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 533 if (!flg_row) { 534 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 535 } 536 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 537 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 538 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 539 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 540 if (!flg_row) { 541 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 542 } 543 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 544 free_used_adj = PETSC_TRUE; 545 } 546 } 547 548 /* Create Schur complement matrix */ 549 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 550 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 551 552 /* setup Schur complements on subsets */ 553 ierr = PCBDDCSubSchursSetUp(sub_schurs,S_j,pcis->is_I_local,pcis->is_B_local,n_all_cc,all_cc,used_xadj,used_adjncy,pcbddc->deluxe_layers);CHKERRQ(ierr); 554 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 555 /* free adjacency */ 556 if (free_used_adj) { 557 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 558 } 559 } 560 for (i=0;i<n_all_cc;i++) { 561 ierr = ISDestroy(&all_cc[i]);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(all_cc);CHKERRQ(ierr); 564 565 /* Number parallel problems */ 566 auxglobal_parallel = 0; 567 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)pc),graph->l2gmap,n_local_parallel_problems,auxlocal_parallel,PETSC_NULL,&n_parallel_problems,&auxglobal_parallel);CHKERRQ(ierr); 568 if (pcbddc->dbg_flag) { 569 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Deluxe global number of parallel subproblems: %d\n",n_parallel_problems); 570 } 571 572 /* Compute data structures to solve parallel problems */ 573 ierr = PCBDDCScalingSetUp_Deluxe_Par(pc,n_local_parallel_problems,n_parallel_problems,auxglobal_parallel,index_parallel);CHKERRQ(ierr); 574 ierr = PetscFree(auxglobal_parallel);CHKERRQ(ierr); 575 576 577 /* Number sequential problems */ 578 auxglobal_sequential = 0; 579 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)pc),graph->l2gmap,n_local_sequential_problems,auxlocal_sequential,PETSC_NULL,&n_sequential_problems,&auxglobal_sequential);CHKERRQ(ierr); 580 if (pcbddc->dbg_flag) { 581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Deluxe global number of sequential subproblems: %d\n",n_sequential_problems); 582 } 583 584 /* Compute data structures to solve sequential problems */ 585 ierr = PCBDDCScalingSetUp_Deluxe_Seq(pc,n_local_sequential_problems,n_sequential_problems,auxglobal_sequential,index_sequential);CHKERRQ(ierr); 586 ierr = PetscFree(auxglobal_sequential);CHKERRQ(ierr); 587 588 /* free workspace */ 589 ierr = PetscFree5(auxmapping,auxlocal_sequential,auxlocal_parallel,index_sequential,index_parallel);CHKERRQ(ierr); 590 #else 591 592 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 593 free_used_adj = PETSC_FALSE; 594 if (pcbddc->deluxe_layers == -1) { 595 used_xadj = NULL; 596 used_adjncy = NULL; 597 } else { 598 if ((pcbddc->deluxe_use_useradj && pcbddc->mat_graph->xadj) || !pcbddc->deluxe_compute_rowadj) { 599 used_xadj = pcbddc->mat_graph->xadj; 600 used_adjncy = pcbddc->mat_graph->adjncy; 601 } else { 602 Mat mat_adj; 603 PetscBool flg_row=PETSC_TRUE; 604 const PetscInt *xadj,*adjncy; 605 PetscInt nvtxs; 606 607 ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 608 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 609 if (!flg_row) { 610 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 611 } 612 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 613 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 614 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 615 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 616 if (!flg_row) { 617 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 618 } 619 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 620 free_used_adj = PETSC_TRUE; 621 } 622 } 623 624 625 /* Create Schur complement matrix */ 626 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 627 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 628 629 /* sub_schurs init */ /* TODO reuse adaptive one if valid (i.e. pcbddc->local_mat == matis->A and same graph info (HOW?) ) */ 630 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->deluxe_threshold);CHKERRQ(ierr); 631 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 632 ierr = PCBDDCSubSchursSetUpNew(sub_schurs,used_xadj,used_adjncy,pcbddc->deluxe_layers);CHKERRQ(ierr); 633 634 /* Compute data structures to solve parallel problems */ 635 ierr = PCBDDCScalingSetUp_Deluxe_Par(pc,sub_schurs->n_subs_par,sub_schurs->n_subs_par_g, 636 sub_schurs->auxglobal_parallel, 637 sub_schurs->index_parallel);CHKERRQ(ierr); 638 /* Compute data structures to solve sequential problems */ 639 ierr = PCBDDCScalingSetUp_Deluxe_Seq_New(pc);CHKERRQ(ierr); 640 641 /* free adjacency */ 642 if (free_used_adj) { 643 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 644 } 645 646 /* diagonal scaling on interface dofs not contained in cc */ 647 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 648 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 649 for (i=0;i<pcis->n-pcis->n_B;i++) { 650 ierr = PetscBTSet(bitmask,idxs[i]);CHKERRQ(ierr); 651 } 652 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 653 654 for (i=0;i<sub_schurs->n_subs;i++) { 655 PetscInt subset_size,j; 656 657 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 658 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 659 for (j=0;j<subset_size;j++) { 660 ierr = PetscBTSet(bitmask,idxs[j]);CHKERRQ(ierr); 661 } 662 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 663 } 664 665 deluxe_ctx->n_simple = 0; 666 for (i=0;i<pcis->n;i++) { 667 if (!PetscBTLookup(bitmask,i)) { 668 deluxe_ctx->n_simple++; 669 } 670 } 671 ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 672 deluxe_ctx->n_simple = 0; 673 for (i=0;i<pcis->n;i++) { 674 if (!PetscBTLookup(bitmask,i)) { 675 deluxe_ctx->idx_simple_B[deluxe_ctx->n_simple++] = i; 676 } 677 } 678 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B,&i,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 679 if (i != deluxe_ctx->n_simple) { 680 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simple scaling dofs! %d != %d",i,deluxe_ctx->n_simple); 681 } 682 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 683 684 #endif 685 /* free graph struct */ 686 if (pcbddc->deluxe_rebuild) { 687 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 688 } 689 PetscFunctionReturn(0); 690 } 691 692 #undef __FUNCT__ 693 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Par" 694 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Par(PC pc, PetscInt n_local_parallel_problems,PetscInt n_parallel_problems,PetscInt global_parallel[],PetscInt index_parallel[]) 695 { 696 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 697 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 698 /* coloring */ 699 Mat parallel_problems; 700 MatColoring coloring_obj; 701 ISColoring coloring_parallel_problems; 702 IS *par_is_colors,*is_colors; 703 /* working stuff */ 704 PetscInt i,j; 705 PetscErrorCode ierr; 706 707 PetscFunctionBegin; 708 if (!n_parallel_problems) { 709 PetscFunctionReturn(0); 710 } 711 /* Color parallel subproblems */ 712 ierr = MatCreate(PetscObjectComm((PetscObject)pc),¶llel_problems);CHKERRQ(ierr); 713 ierr = MatSetSizes(parallel_problems,PETSC_DECIDE,PETSC_DECIDE,n_parallel_problems,n_parallel_problems);CHKERRQ(ierr); 714 ierr = MatSetType(parallel_problems,MATAIJ);CHKERRQ(ierr); 715 ierr = MatSetUp(parallel_problems);CHKERRQ(ierr); 716 ierr = MatSetOption(parallel_problems,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 717 ierr = MatSetOption(parallel_problems,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 718 for (i=0;i<n_local_parallel_problems;i++) { 719 PetscInt row = global_parallel[i]; 720 for (j=0;j<n_local_parallel_problems;j++) { 721 PetscInt col = global_parallel[j]; 722 if (row != col) { 723 ierr = MatSetValue(parallel_problems,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 724 } 725 } 726 } 727 ierr = MatAssemblyBegin(parallel_problems,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 728 ierr = MatAssemblyEnd(parallel_problems,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 729 if (pcbddc->dbg_flag > 1) { 730 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 731 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Adj matrix for deluxe parallel problems\n");CHKERRQ(ierr); 732 ierr = MatView(parallel_problems,pcbddc->dbg_viewer);CHKERRQ(ierr); 733 } 734 ierr = MatColoringCreate(parallel_problems,&coloring_obj);CHKERRQ(ierr); 735 ierr = MatColoringSetDistance(coloring_obj,1);CHKERRQ(ierr); 736 ierr = MatColoringSetType(coloring_obj,MATCOLORINGJP);CHKERRQ(ierr); 737 ierr = MatColoringApply(coloring_obj,&coloring_parallel_problems);CHKERRQ(ierr); 738 ierr = ISColoringGetIS(coloring_parallel_problems,&deluxe_ctx->par_colors,&par_is_colors);CHKERRQ(ierr); 739 if (pcbddc->dbg_flag) { 740 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 741 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Number of colors %d for parallel part of deluxe\n",deluxe_ctx->par_colors);CHKERRQ(ierr); 742 } 743 744 /* all procs should know the color distribution */ 745 ierr = PetscMalloc1(deluxe_ctx->par_colors,&is_colors);CHKERRQ(ierr); 746 for (i=0;i<deluxe_ctx->par_colors;i++) { 747 if (pcbddc->dbg_flag) { 748 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Global problem indexes for color %d\n",i);CHKERRQ(ierr); 749 ierr = ISView(par_is_colors[i],pcbddc->dbg_viewer);CHKERRQ(ierr); 750 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 751 } 752 ierr = ISAllGather(par_is_colors[i],&is_colors[i]);CHKERRQ(ierr); 753 } 754 755 /* free unneeded objects */ 756 ierr = ISColoringRestoreIS(coloring_parallel_problems,&par_is_colors);CHKERRQ(ierr); 757 ierr = ISColoringDestroy(&coloring_parallel_problems);CHKERRQ(ierr); 758 ierr = MatColoringDestroy(&coloring_obj);CHKERRQ(ierr); 759 ierr = MatDestroy(¶llel_problems);CHKERRQ(ierr); 760 761 /* allocate deluxe arrays for parallel problems */ 762 ierr = PetscMalloc5(deluxe_ctx->par_colors,&deluxe_ctx->par_ksp, 763 deluxe_ctx->par_colors,&deluxe_ctx->par_scctx_s, 764 deluxe_ctx->par_colors,&deluxe_ctx->par_scctx_p, 765 deluxe_ctx->par_colors,&deluxe_ctx->par_vec, 766 deluxe_ctx->par_colors,&deluxe_ctx->par_col2sub);CHKERRQ(ierr); 767 768 /* cycle on colors */ 769 for (i=0;i<deluxe_ctx->par_colors;i++) { 770 PetscSubcomm par_subcomm; 771 const PetscInt* idxs_subproblems; 772 PetscInt color_size; 773 PetscMPIInt rank,active_color; 774 775 /* get local index of i-th parallel colored problem */ 776 ierr = ISGetLocalSize(is_colors[i],&color_size);CHKERRQ(ierr); 777 ierr = ISGetIndices(is_colors[i],&idxs_subproblems);CHKERRQ(ierr); 778 /* split comm for computing parallel problems for this color */ 779 /* Processes not partecipating at this stage will have color = color_size */ 780 /* because PetscCommDuplicate does not handle MPI_COMM_NULL */ 781 active_color = color_size; 782 deluxe_ctx->par_col2sub[i] = -1; 783 for (j=0;j<n_local_parallel_problems;j++) { 784 PetscInt local_idx; 785 ierr = PetscFindInt(global_parallel[j],color_size,idxs_subproblems,&local_idx);CHKERRQ(ierr); 786 if (local_idx > -1) { 787 ierr = PetscMPIIntCast(local_idx,&active_color);CHKERRQ(ierr); 788 deluxe_ctx->par_col2sub[i] = index_parallel[j]; 789 break; 790 } 791 } 792 ierr = ISRestoreIndices(is_colors[i],&idxs_subproblems);CHKERRQ(ierr); 793 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)pc),&par_subcomm);CHKERRQ(ierr); 794 ierr = PetscSubcommSetNumber(par_subcomm,color_size+1);CHKERRQ(ierr); 795 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 796 ierr = PetscSubcommSetTypeGeneral(par_subcomm,active_color,rank);CHKERRQ(ierr); 797 /* print debug info */ 798 if (pcbddc->dbg_flag) { 799 PetscMPIInt crank,csize; 800 ierr = MPI_Comm_rank(par_subcomm->comm,&crank);CHKERRQ(ierr); 801 ierr = MPI_Comm_size(par_subcomm->comm,&csize);CHKERRQ(ierr); 802 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Color %d: size %d, details follows.\n",i,color_size);CHKERRQ(ierr); 803 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 804 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 805 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Subdomain %d: color in subcomm %d (rank %d out of %d) (lidx %d)\n",PetscGlobalRank,par_subcomm->color,crank,csize,deluxe_ctx->par_col2sub[i]);CHKERRQ(ierr); 806 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 807 } 808 809 if (deluxe_ctx->par_col2sub[i] >= 0) { 810 PC pctemp; 811 PC_IS *pcis=(PC_IS*)pc->data; 812 Mat color_mat,color_mat_is,temp_mat; 813 ISLocalToGlobalMapping WtoNmap,l2gmap_subset; 814 IS is_local_numbering,isB_local,isW_local,isW; 815 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs[1]; 816 PetscInt subidx,n_local_dofs,n_global_dofs; 817 PetscInt *global_numbering,*local_numbering; 818 char ksp_prefix[256]; 819 size_t len; 820 821 /* Local index for schur complement on subset */ 822 subidx = deluxe_ctx->par_col2sub[i]; 823 824 /* Parallel numbering for dofs in colored subset */ 825 ierr = ISSum(sub_schurs->is_I_layer,sub_schurs->is_AEj_B[subidx],&is_local_numbering);CHKERRQ(ierr); 826 ierr = ISGetLocalSize(is_local_numbering,&n_local_dofs);CHKERRQ(ierr); 827 ierr = ISGetIndices(is_local_numbering,(const PetscInt **)&local_numbering);CHKERRQ(ierr); 828 ierr = PCBDDCSubsetNumbering(par_subcomm->comm,pcbddc->mat_graph->l2gmap,n_local_dofs,local_numbering,PETSC_NULL,&n_global_dofs,&global_numbering);CHKERRQ(ierr); 829 ierr = ISRestoreIndices(is_local_numbering,(const PetscInt **)&local_numbering);CHKERRQ(ierr); 830 831 /* L2Gmap from relevant dofs to local dofs */ 832 ierr = ISLocalToGlobalMappingCreateIS(is_local_numbering,&WtoNmap);CHKERRQ(ierr); 833 834 /* L2Gmap from local to global dofs */ 835 ierr = ISLocalToGlobalMappingCreate(par_subcomm->comm,1,n_local_dofs,global_numbering,PETSC_COPY_VALUES,&l2gmap_subset);CHKERRQ(ierr); 836 837 /* compute parallel matrix (extended dirichlet problem on subset) */ 838 ierr = MatCreateIS(par_subcomm->comm,1,PETSC_DECIDE,PETSC_DECIDE,n_global_dofs,n_global_dofs,l2gmap_subset,&color_mat_is);CHKERRQ(ierr); 839 ierr = MatGetSubMatrix(pcbddc->local_mat,is_local_numbering,is_local_numbering,MAT_INITIAL_MATRIX,&temp_mat);CHKERRQ(ierr); 840 ierr = MatISSetLocalMat(color_mat_is,temp_mat);CHKERRQ(ierr); 841 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 842 ierr = MatISGetMPIXAIJ(color_mat_is,MAT_INITIAL_MATRIX,&color_mat);CHKERRQ(ierr); 843 ierr = MatDestroy(&color_mat_is);CHKERRQ(ierr); 844 845 /* work vector for (parallel) extended dirichlet problem */ 846 ierr = MatCreateVecs(color_mat,&deluxe_ctx->par_vec[i],NULL);CHKERRQ(ierr); 847 848 /* compute scatters */ 849 /* deluxe_ctx->par_scctx_p[i] extension from local subset to extended dirichlet problem 850 deluxe_ctx->par_scctx_s[i] restriction from local boundary to subset -> simple copy of selected values */ 851 ierr = ISGlobalToLocalMappingApplyIS(pcis->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[subidx],&isB_local);CHKERRQ(ierr); 852 ierr = VecScatterCreate(pcbddc->work_scaling,isB_local,sub_schurs->work1[subidx],NULL,&deluxe_ctx->par_scctx_s[i]);CHKERRQ(ierr); 853 ierr = ISGlobalToLocalMappingApplyIS(WtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[subidx],&isW_local);CHKERRQ(ierr); 854 ierr = ISLocalToGlobalMappingApplyIS(l2gmap_subset,isW_local,&isW);CHKERRQ(ierr); 855 ierr = VecScatterCreate(sub_schurs->work1[subidx],NULL,deluxe_ctx->par_vec[i],isW,&deluxe_ctx->par_scctx_p[i]);CHKERRQ(ierr); 856 857 /* free objects no longer neeeded */ 858 ierr = ISDestroy(&isW);CHKERRQ(ierr); 859 ierr = ISDestroy(&isW_local);CHKERRQ(ierr); 860 ierr = ISDestroy(&isB_local);CHKERRQ(ierr); 861 ierr = ISLocalToGlobalMappingDestroy(&WtoNmap);CHKERRQ(ierr); 862 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subset);CHKERRQ(ierr); 863 ierr = ISDestroy(&is_local_numbering);CHKERRQ(ierr); 864 ierr = PetscFree(global_numbering);CHKERRQ(ierr); 865 866 /* KSP for extended dirichlet problem */ 867 ierr = KSPCreate(par_subcomm->comm,&deluxe_ctx->par_ksp[i]);CHKERRQ(ierr); 868 ierr = KSPSetOperators(deluxe_ctx->par_ksp[i],color_mat,color_mat);CHKERRQ(ierr); 869 ierr = KSPSetTolerances(deluxe_ctx->par_ksp[i],1.e-12,1.e-12,1.e10,10000);CHKERRQ(ierr); 870 ierr = KSPSetType(deluxe_ctx->par_ksp[i],KSPPREONLY);CHKERRQ(ierr); 871 ierr = KSPGetPC(deluxe_ctx->par_ksp[i],&pctemp);CHKERRQ(ierr); 872 ierr = PCSetType(pctemp,PCREDUNDANT);CHKERRQ(ierr); 873 ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr); 874 len -= 10; /* remove "dirichlet_" */ 875 ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len+1);CHKERRQ(ierr); /* PetscStrncpy puts a terminating char at the end */ 876 ierr = PetscStrcat(ksp_prefix,"deluxe_par_");CHKERRQ(ierr); 877 ierr = KSPSetOptionsPrefix(deluxe_ctx->par_ksp[i],ksp_prefix);CHKERRQ(ierr); 878 ierr = KSPSetFromOptions(deluxe_ctx->par_ksp[i]);CHKERRQ(ierr); 879 ierr = KSPSetUp(deluxe_ctx->par_ksp[i]);CHKERRQ(ierr); 880 ierr = MatDestroy(&color_mat);CHKERRQ(ierr); 881 } else { /* not partecipating in color */ 882 deluxe_ctx->par_ksp[i] = 0; 883 deluxe_ctx->par_vec[i] = 0; 884 deluxe_ctx->par_scctx_p[i] = 0; 885 deluxe_ctx->par_scctx_s[i] = 0; 886 } 887 ierr = PetscSubcommDestroy(&par_subcomm);CHKERRQ(ierr); 888 } 889 for (i=0;i<deluxe_ctx->par_colors;i++) { 890 ierr = ISDestroy(&is_colors[i]);CHKERRQ(ierr); 891 } 892 ierr = PetscFree(is_colors);CHKERRQ(ierr); 893 894 if (pcbddc->dbg_flag) { 895 Vec test_vec; 896 PetscReal error; 897 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs[1]; 898 /* test partition of unity of coloured schur complements */ 899 for (i=0;i<deluxe_ctx->par_colors;i++) { 900 PetscInt subidx = deluxe_ctx->par_col2sub[i]; 901 PetscBool error_found = PETSC_FALSE; 902 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 903 904 if (deluxe_ctx->par_ksp[i]) { 905 /* create random test vec being zero on internal nodes of the extende dirichlet problem */ 906 ierr = VecDuplicate(deluxe_ctx->par_vec[i],&test_vec);CHKERRQ(ierr); 907 ierr = VecSetRandom(sub_schurs->work1[subidx],PETSC_NULL);CHKERRQ(ierr); 908 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 909 ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 911 /* w_j */ 912 ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],test_vec,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 913 ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],test_vec,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 914 /* S_j*w_j */ 915 ierr = MatMult(sub_schurs->S_Ej[subidx],sub_schurs->work1[subidx],sub_schurs->work2[subidx]);CHKERRQ(ierr); 916 /* \sum_j S_j*w_j */ 917 ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr); 918 ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919 ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 920 /* (\sum_j S_j)^(-1)(\sum_j S_j*w_j) */ 921 ierr = KSPSolve(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr); 922 ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 923 ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 924 ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr); 925 ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 926 ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 /* test partition of unity */ 928 ierr = VecAXPY(test_vec,-1.0,deluxe_ctx->par_vec[i]);CHKERRQ(ierr); 929 ierr = VecNorm(test_vec,NORM_INFINITY,&error);CHKERRQ(ierr); 930 if (PetscAbsReal(error) > 1.e-2) { 931 /* ierr = VecView(test_vec,0);CHKERRQ(ierr); */ 932 error_found = PETSC_TRUE; 933 } 934 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 935 } 936 if (error_found) { 937 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Error testing local schur for color %d and subdomain %d\n",i,PetscGlobalRank);CHKERRQ(ierr); 938 } 939 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 940 } 941 } 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Seq_New" 947 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq_New(PC pc) 948 { 949 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 950 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 951 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs[1]; 952 PC pc_temp; 953 MatSolverPackage solver=NULL; 954 char ksp_prefix[256]; 955 size_t len; 956 PetscInt local_size; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 if (!sub_schurs->n_subs_seq_g) { 961 PetscFunctionReturn(0); 962 } 963 964 /* Create work vectors for sequential part of deluxe */ 965 ierr = MatCreateVecs(sub_schurs->S_Ej_all,&deluxe_ctx->seq_work1,&deluxe_ctx->seq_work2);CHKERRQ(ierr); 966 967 /* Compute deluxe sequential scatter */ 968 ierr = VecScatterCreate(pcbddc->work_scaling,sub_schurs->is_Ej_all,deluxe_ctx->seq_work1,NULL,&deluxe_ctx->seq_scctx);CHKERRQ(ierr); 969 970 /* Create KSP object for sequential part of deluxe scaling */ 971 ierr = KSPCreate(PETSC_COMM_SELF,&deluxe_ctx->seq_ksp);CHKERRQ(ierr); 972 ierr = KSPSetOperators(deluxe_ctx->seq_ksp,sub_schurs->sum_S_Ej_all,sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 973 ierr = KSPSetType(deluxe_ctx->seq_ksp,KSPPREONLY);CHKERRQ(ierr); 974 ierr = KSPGetPC(deluxe_ctx->seq_ksp,&pc_temp);CHKERRQ(ierr); 975 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 976 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 977 ierr = PCFactorGetMatSolverPackage(pc_temp,(const MatSolverPackage*)&solver);CHKERRQ(ierr); 978 ierr = MatGetSize(sub_schurs->sum_S_Ej_all,&local_size,NULL);CHKERRQ(ierr); 979 if (solver && local_size) { /* if local_size is null, some external packages will report errors */ 980 PC new_pc; 981 PCType type; 982 ierr = PCGetType(pc_temp,&type);CHKERRQ(ierr); 983 ierr = KSPGetPC(deluxe_ctx->seq_ksp,&new_pc);CHKERRQ(ierr); 984 ierr = PCSetType(new_pc,type);CHKERRQ(ierr); 985 ierr = PCFactorSetMatSolverPackage(new_pc,solver);CHKERRQ(ierr); 986 } 987 ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr); 988 len -= 10; /* remove "dirichlet_" */ 989 ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len+1);CHKERRQ(ierr); 990 ierr = PetscStrcat(ksp_prefix,"deluxe_seq_");CHKERRQ(ierr); 991 ierr = KSPSetOptionsPrefix(deluxe_ctx->seq_ksp,ksp_prefix);CHKERRQ(ierr); 992 if (local_size) { 993 ierr = KSPSetFromOptions(deluxe_ctx->seq_ksp);CHKERRQ(ierr); 994 } 995 ierr = KSPSetUp(deluxe_ctx->seq_ksp);CHKERRQ(ierr); 996 PetscFunctionReturn(0); 997 } 998 999 #undef __FUNCT__ 1000 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Seq" 1001 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq(PC pc,PetscInt n_local_sequential_problems,PetscInt n_sequential_problems,PetscInt global_sequential[],PetscInt local_sequential[]) 1002 { 1003 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 1004 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 1005 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs[1]; 1006 ISLocalToGlobalMapping l2gmap_subsets; 1007 Mat global_schur_subsets,*submat_global_schur_subsets,work_mat; 1008 IS is_to,is_from; 1009 PetscScalar *fill_vals; 1010 PetscInt *nnz,*all_local_idx_G,*all_local_idx_B,*all_local_idx_N,*all_permutation_G,*dummy_idx; 1011 PetscInt i,j,local_problem_index; 1012 PetscInt subset_size,max_subset_size; 1013 PetscInt local_size,global_size; 1014 PC pc_temp; 1015 MatSolverPackage solver=NULL; 1016 char ksp_prefix[256]; 1017 size_t len; 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 if (!n_sequential_problems) { 1022 PetscFunctionReturn(0); 1023 } 1024 1025 /* Get info on subset sizes and sum of all subsets sizes */ 1026 max_subset_size = 0; 1027 local_size = 0; 1028 for (i=0;i<n_local_sequential_problems;i++) { 1029 local_problem_index = local_sequential[i]; 1030 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr); 1031 max_subset_size = PetscMax(subset_size,max_subset_size); 1032 local_size += subset_size; 1033 } 1034 1035 /* Work arrays for local indices */ 1036 ierr = PetscMalloc1(local_size,&all_local_idx_B);CHKERRQ(ierr); 1037 ierr = PetscMalloc1(local_size,&all_local_idx_N);CHKERRQ(ierr); 1038 ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr); 1039 1040 /* Get local indices in local whole numbering and local boundary numbering */ 1041 local_size = 0; 1042 for (i=0;i<n_local_sequential_problems;i++) { 1043 PC_IS *pcis=(PC_IS*)pc->data; 1044 PetscInt *idxs; 1045 /* get info on local problem */ 1046 local_problem_index = local_sequential[i]; 1047 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr); 1048 ierr = ISGetIndices(sub_schurs->is_AEj_B[local_problem_index],(const PetscInt**)&idxs);CHKERRQ(ierr); 1049 /* subset indices in local numbering */ 1050 ierr = PetscMemcpy(all_local_idx_N+local_size,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1051 /* subset indices in local boundary numbering */ 1052 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,subset_size,idxs,&j,&all_local_idx_B[local_size]);CHKERRQ(ierr); 1053 ierr = ISRestoreIndices(sub_schurs->is_AEj_B[local_problem_index],(const PetscInt**)&idxs);CHKERRQ(ierr); 1054 if (j != subset_size) { 1055 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in BDDC deluxe serial %d (BtoNmap)! %d != %d\n",local_problem_index,subset_size,j); 1056 } 1057 for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size; 1058 local_size += subset_size; 1059 } 1060 1061 /* Number dofs on all subsets (parallel) and sort numbering */ 1062 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)pc),pcbddc->mat_graph->l2gmap,local_size,all_local_idx_N,PETSC_NULL,&global_size,&all_local_idx_G);CHKERRQ(ierr); 1063 ierr = PetscMalloc1(local_size,&all_permutation_G);CHKERRQ(ierr); 1064 for (i=0;i<local_size;i++) { 1065 all_permutation_G[i]=i; 1066 } 1067 ierr = PetscSortIntWithPermutation(local_size,all_local_idx_G,all_permutation_G);CHKERRQ(ierr); 1068 1069 /* Local matrix of all local Schur on subsets */ 1070 ierr = MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);CHKERRQ(ierr); 1071 ierr = MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr); 1072 ierr = MatSetType(sub_schurs->S_Ej_all,MATAIJ);CHKERRQ(ierr); 1073 ierr = MatSetOption(sub_schurs->S_Ej_all,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1074 ierr = MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);CHKERRQ(ierr); 1075 ierr = PetscFree(nnz);CHKERRQ(ierr); 1076 1077 /* Work arrays */ 1078 ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&fill_vals);CHKERRQ(ierr); 1079 1080 /* Loop on local problems to compute Schur complements explicitly */ 1081 local_size = 0; 1082 for (i=0;i<n_local_sequential_problems;i++) { 1083 /* get info on local problem */ 1084 local_problem_index = local_sequential[i]; 1085 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr); 1086 /* local Schur */ 1087 for (j=0;j<subset_size;j++) { 1088 ierr = VecSet(sub_schurs->work1[local_problem_index],0.0);CHKERRQ(ierr); 1089 ierr = VecSetValue(sub_schurs->work1[local_problem_index],j,1.0,INSERT_VALUES);CHKERRQ(ierr); 1090 ierr = VecPlaceArray(sub_schurs->work2[local_problem_index],&fill_vals[j*subset_size]);CHKERRQ(ierr); 1091 ierr = MatMult(sub_schurs->S_Ej[local_problem_index],sub_schurs->work1[local_problem_index],sub_schurs->work2[local_problem_index]);CHKERRQ(ierr); 1092 ierr = VecResetArray(sub_schurs->work2[local_problem_index]);CHKERRQ(ierr); 1093 } 1094 for (j=0;j<subset_size;j++) { 1095 dummy_idx[j]=local_size+j; 1096 } 1097 ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,fill_vals,INSERT_VALUES);CHKERRQ(ierr); 1098 local_size += subset_size; 1099 } 1100 ierr = PetscFree2(dummy_idx,fill_vals);CHKERRQ(ierr); 1101 ierr = MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1102 ierr = MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1103 1104 /* Global matrix of all assembled Schur on subsets */ 1105 ierr = ISLocalToGlobalMappingCreate(PetscObjectComm((PetscObject)pc),1,local_size,all_local_idx_G,PETSC_COPY_VALUES,&l2gmap_subsets);CHKERRQ(ierr); 1106 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,&work_mat);CHKERRQ(ierr); 1107 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subsets);CHKERRQ(ierr); 1108 ierr = MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);CHKERRQ(ierr); 1109 ierr = MatISGetMPIXAIJ(work_mat,MAT_INITIAL_MATRIX,&global_schur_subsets);CHKERRQ(ierr); 1110 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1111 /* Create work vectors for sequential part of deluxe */ 1112 ierr = MatCreateVecs(sub_schurs->S_Ej_all,&deluxe_ctx->seq_work1,&deluxe_ctx->seq_work2);CHKERRQ(ierr); 1113 1114 /* Compute deluxe sequential scatter */ 1115 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&is_from);CHKERRQ(ierr); 1116 ierr = VecScatterCreate(pcbddc->work_scaling,is_from,deluxe_ctx->seq_work1,NULL,&deluxe_ctx->seq_scctx);CHKERRQ(ierr); 1117 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1118 1119 /* Get local part of (\sum_j S_Ej) */ 1120 for (i=0;i<local_size;i++) { 1121 all_local_idx_N[i] = all_local_idx_G[all_permutation_G[i]]; 1122 } 1123 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),local_size,all_local_idx_N,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1124 ierr = MatGetSubMatrices(global_schur_subsets,1,&is_to,&is_to,MAT_INITIAL_MATRIX,&submat_global_schur_subsets);CHKERRQ(ierr); 1125 ierr = MatDestroy(&global_schur_subsets);CHKERRQ(ierr); 1126 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1127 for (i=0;i<local_size;i++) { 1128 all_local_idx_G[all_permutation_G[i]] = i; 1129 } 1130 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_G,PETSC_OWN_POINTER,&is_from);CHKERRQ(ierr); 1131 ierr = ISSetPermutation(is_from);CHKERRQ(ierr); 1132 ierr = MatPermute(submat_global_schur_subsets[0],is_from,is_from,&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 1133 ierr = MatDestroyMatrices(1,&submat_global_schur_subsets);CHKERRQ(ierr); 1134 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1135 ierr = PetscFree(all_permutation_G);CHKERRQ(ierr); 1136 1137 /* Create KSP object for sequential part of deluxe scaling */ 1138 ierr = KSPCreate(PETSC_COMM_SELF,&deluxe_ctx->seq_ksp);CHKERRQ(ierr); 1139 ierr = KSPSetOperators(deluxe_ctx->seq_ksp,sub_schurs->sum_S_Ej_all,sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 1140 ierr = KSPSetType(deluxe_ctx->seq_ksp,KSPPREONLY);CHKERRQ(ierr); 1141 ierr = KSPGetPC(deluxe_ctx->seq_ksp,&pc_temp);CHKERRQ(ierr); 1142 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1143 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1144 ierr = PCFactorGetMatSolverPackage(pc_temp,(const MatSolverPackage*)&solver);CHKERRQ(ierr); 1145 if (solver && local_size) { /* if local_size is null, some external packages will report errors */ 1146 PC new_pc; 1147 PCType type; 1148 ierr = PCGetType(pc_temp,&type);CHKERRQ(ierr); 1149 ierr = KSPGetPC(deluxe_ctx->seq_ksp,&new_pc);CHKERRQ(ierr); 1150 ierr = PCSetType(new_pc,type);CHKERRQ(ierr); 1151 ierr = PCFactorSetMatSolverPackage(new_pc,solver);CHKERRQ(ierr); 1152 } 1153 ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr); 1154 len -= 10; /* remove "dirichlet_" */ 1155 ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len+1);CHKERRQ(ierr); 1156 ierr = PetscStrcat(ksp_prefix,"deluxe_seq_");CHKERRQ(ierr); 1157 ierr = KSPSetOptionsPrefix(deluxe_ctx->seq_ksp,ksp_prefix);CHKERRQ(ierr); 1158 if (local_size) { 1159 ierr = KSPSetFromOptions(deluxe_ctx->seq_ksp);CHKERRQ(ierr); 1160 } 1161 ierr = KSPSetUp(deluxe_ctx->seq_ksp);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164