1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 6 /* prototypes for deluxe functions */ 7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC); 11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 12 13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense*)A->data; 16 const PetscScalar *b; 17 PetscScalar *x; 18 PetscInt n; 19 PetscBLASInt nrhs,info,m; 20 PetscBool flg; 21 22 PetscFunctionBegin; 23 PetscCall(PetscBLASIntCast(A->rmap->n,&m)); 24 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL)); 25 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 26 PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL)); 27 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 28 29 PetscCall(MatGetSize(B,NULL,&n)); 30 PetscCall(PetscBLASIntCast(n,&nrhs)); 31 PetscCall(MatDenseGetArrayRead(B,&b)); 32 PetscCall(MatDenseGetArray(X,&x)); 33 PetscCall(PetscArraycpy(x,b,m*nrhs)); 34 PetscCall(MatDenseRestoreArrayRead(B,&b)); 35 36 if (A->factortype == MAT_FACTOR_LU) { 37 PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info)); 38 PetscCheck(!info,PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve"); 39 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported"); 40 41 PetscCall(MatDenseRestoreArray(X,&x)); 42 PetscCall(PetscLogFlops(nrhs*(2.0*m*m - m))); 43 PetscFunctionReturn(0); 44 } 45 46 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) 47 { 48 PC_IS* pcis = (PC_IS*)pc->data; 49 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 50 51 PetscFunctionBegin; 52 /* Apply partition of unity */ 53 PetscCall(VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector)); 54 PetscCall(VecSet(global_vector,0.0)); 55 PetscCall(VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE)); 56 PetscCall(VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE)); 57 PetscFunctionReturn(0); 58 } 59 60 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) 61 { 62 PC_IS* pcis=(PC_IS*)pc->data; 63 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 64 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 65 66 PetscFunctionBegin; 67 PetscCall(VecSet(pcbddc->work_scaling,0.0)); 68 PetscCall(VecSet(y,0.0)); 69 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 70 PetscInt i; 71 const PetscScalar *array_x,*array_D; 72 PetscScalar *array; 73 PetscCall(VecGetArrayRead(x,&array_x)); 74 PetscCall(VecGetArrayRead(pcis->D,&array_D)); 75 PetscCall(VecGetArray(pcbddc->work_scaling,&array)); 76 for (i=0;i<deluxe_ctx->n_simple;i++) { 77 array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]]; 78 } 79 PetscCall(VecRestoreArray(pcbddc->work_scaling,&array)); 80 PetscCall(VecRestoreArrayRead(pcis->D,&array_D)); 81 PetscCall(VecRestoreArrayRead(x,&array_x)); 82 } 83 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 84 if (deluxe_ctx->seq_mat) { 85 PetscInt i; 86 for (i=0;i<deluxe_ctx->seq_n;i++) { 87 if (deluxe_ctx->change) { 88 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD)); 89 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD)); 90 if (deluxe_ctx->change_with_qr) { 91 Mat change; 92 93 PetscCall(KSPGetOperators(deluxe_ctx->change[i],&change,NULL)); 94 PetscCall(MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i])); 95 } else { 96 PetscCall(KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i])); 97 } 98 } else { 99 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD)); 100 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD)); 101 } 102 PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i])); 103 if (deluxe_ctx->seq_mat_inv_sum[i]) { 104 PetscScalar *x; 105 106 PetscCall(VecGetArray(deluxe_ctx->seq_work2[i],&x)); 107 PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i],x)); 108 PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i],&x)); 109 PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i])); 110 PetscCall(VecResetArray(deluxe_ctx->seq_work1[i])); 111 } 112 if (deluxe_ctx->change) { 113 Mat change; 114 115 PetscCall(KSPGetOperators(deluxe_ctx->change[i],&change,NULL)); 116 PetscCall(MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i])); 117 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE)); 118 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE)); 119 } else { 120 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE)); 121 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE)); 122 } 123 } 124 } 125 /* put local boundary part in global vector */ 126 PetscCall(VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE)); 127 PetscCall(VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE)); 128 PetscFunctionReturn(0); 129 } 130 131 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) 132 { 133 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 134 135 PetscFunctionBegin; 136 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 137 PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2); 138 PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3); 139 PetscCheckFalse(local_interface_vector == pcbddc->work_scaling,PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!"); 140 PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector)); 141 PetscFunctionReturn(0); 142 } 143 144 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) 145 { 146 PC_IS *pcis = (PC_IS*)pc->data; 147 148 PetscFunctionBegin; 149 PetscCall(VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD)); 150 PetscCall(VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD)); 151 /* Apply partition of unity */ 152 PetscCall(VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector)); 153 PetscFunctionReturn(0); 154 } 155 156 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) 157 { 158 PC_IS* pcis=(PC_IS*)pc->data; 159 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 160 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 161 162 PetscFunctionBegin; 163 /* get local boundary part of global vector */ 164 PetscCall(VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD)); 165 PetscCall(VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD)); 166 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 167 PetscInt i; 168 PetscScalar *array_y; 169 const PetscScalar *array_D; 170 PetscCall(VecGetArray(y,&array_y)); 171 PetscCall(VecGetArrayRead(pcis->D,&array_D)); 172 for (i=0;i<deluxe_ctx->n_simple;i++) { 173 array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; 174 } 175 PetscCall(VecRestoreArrayRead(pcis->D,&array_D)); 176 PetscCall(VecRestoreArray(y,&array_y)); 177 } 178 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 179 if (deluxe_ctx->seq_mat) { 180 PetscInt i; 181 for (i=0;i<deluxe_ctx->seq_n;i++) { 182 if (deluxe_ctx->change) { 183 Mat change; 184 185 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD)); 186 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD)); 187 PetscCall(KSPGetOperators(deluxe_ctx->change[i],&change,NULL)); 188 PetscCall(MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i])); 189 } else { 190 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD)); 191 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD)); 192 } 193 if (deluxe_ctx->seq_mat_inv_sum[i]) { 194 PetscScalar *x; 195 196 PetscCall(VecGetArray(deluxe_ctx->seq_work1[i],&x)); 197 PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i],x)); 198 PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i],&x)); 199 PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i])); 200 PetscCall(VecResetArray(deluxe_ctx->seq_work2[i])); 201 } 202 PetscCall(MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i])); 203 if (deluxe_ctx->change) { 204 if (deluxe_ctx->change_with_qr) { 205 Mat change; 206 207 PetscCall(KSPGetOperators(deluxe_ctx->change[i],&change,NULL)); 208 PetscCall(MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i])); 209 } else { 210 PetscCall(KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i])); 211 } 212 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE)); 213 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE)); 214 } else { 215 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE)); 216 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE)); 217 } 218 } 219 } 220 PetscFunctionReturn(0); 221 } 222 223 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) 224 { 225 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 226 227 PetscFunctionBegin; 228 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 229 PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2); 230 PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3); 231 PetscCheckFalse(local_interface_vector == pcbddc->work_scaling,PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!"); 232 PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector)); 233 PetscFunctionReturn(0); 234 } 235 236 PetscErrorCode PCBDDCScalingSetUp(PC pc) 237 { 238 PC_IS* pcis=(PC_IS*)pc->data; 239 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 240 241 PetscFunctionBegin; 242 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 243 PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0)); 244 /* create work vector for the operator */ 245 PetscCall(VecDestroy(&pcbddc->work_scaling)); 246 PetscCall(VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling)); 247 /* always rebuild pcis->D */ 248 if (pcis->use_stiffness_scaling) { 249 PetscScalar *a; 250 PetscInt i,n; 251 252 PetscCall(MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N)); 253 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD)); 254 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD)); 255 PetscCall(VecAbs(pcis->D)); 256 PetscCall(VecGetLocalSize(pcis->D,&n)); 257 PetscCall(VecGetArray(pcis->D,&a)); 258 for (i=0;i<n;i++) if (PetscAbsScalar(a[i])<PETSC_SMALL) a[i] = 1.0; 259 PetscCall(VecRestoreArray(pcis->D,&a)); 260 } 261 PetscCall(VecSet(pcis->vec1_global,0.0)); 262 PetscCall(VecScatterBegin(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 263 PetscCall(VecScatterEnd(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 264 PetscCall(VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 265 PetscCall(VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 266 PetscCall(VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B)); 267 /* now setup */ 268 if (pcbddc->use_deluxe_scaling) { 269 if (!pcbddc->deluxe_ctx) { 270 PetscCall(PCBDDCScalingCreate_Deluxe(pc)); 271 } 272 PetscCall(PCBDDCScalingSetUp_Deluxe(pc)); 273 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe)); 274 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe)); 275 } else { 276 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic)); 277 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic)); 278 } 279 PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0)); 280 281 /* test */ 282 if (pcbddc->dbg_flag) { 283 Mat B0_B = NULL; 284 Vec B0_Bv = NULL, B0_Bv2 = NULL; 285 Vec vec2_global; 286 PetscViewer viewer = pcbddc->dbg_viewer; 287 PetscReal error; 288 289 /* extension -> from local to parallel */ 290 PetscCall(VecSet(pcis->vec1_global,0.0)); 291 PetscCall(VecSetRandom(pcis->vec1_B,NULL)); 292 PetscCall(VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 293 PetscCall(VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 294 PetscCall(VecDuplicate(pcis->vec1_global,&vec2_global)); 295 PetscCall(VecCopy(pcis->vec1_global,vec2_global)); 296 PetscCall(VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 297 PetscCall(VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 298 if (pcbddc->benign_n) { 299 IS is_dummy; 300 301 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 302 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 303 PetscCall(ISDestroy(&is_dummy)); 304 PetscCall(MatCreateVecs(B0_B,NULL,&B0_Bv)); 305 PetscCall(VecDuplicate(B0_Bv,&B0_Bv2)); 306 PetscCall(MatMult(B0_B,pcis->vec1_B,B0_Bv)); 307 } 308 PetscCall(PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global)); 309 if (pcbddc->benign_saddle_point) { 310 PetscReal errorl = 0.; 311 PetscCall(VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 312 PetscCall(VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 313 if (pcbddc->benign_n) { 314 PetscCall(MatMult(B0_B,pcis->vec1_B,B0_Bv2)); 315 PetscCall(VecAXPY(B0_Bv,-1.0,B0_Bv2)); 316 PetscCall(VecNorm(B0_Bv,NORM_INFINITY,&errorl)); 317 } 318 PetscCallMPI(MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc))); 319 PetscCall(PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error)); 320 } 321 PetscCall(VecAXPY(pcis->vec1_global,-1.0,vec2_global)); 322 PetscCall(VecNorm(pcis->vec1_global,NORM_INFINITY,&error)); 323 PetscCall(PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error)); 324 PetscCall(VecDestroy(&vec2_global)); 325 326 /* restriction -> from parallel to local */ 327 PetscCall(VecSet(pcis->vec1_global,0.0)); 328 PetscCall(VecSetRandom(pcis->vec1_B,NULL)); 329 PetscCall(VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 330 PetscCall(VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 331 PetscCall(PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B)); 332 PetscCall(VecScale(pcis->vec1_B,-1.0)); 333 PetscCall(VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 334 PetscCall(VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 335 PetscCall(VecNorm(pcis->vec1_global,NORM_INFINITY,&error)); 336 PetscCall(PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error)); 337 PetscCall(MatDestroy(&B0_B)); 338 PetscCall(VecDestroy(&B0_Bv)); 339 PetscCall(VecDestroy(&B0_Bv2)); 340 } 341 PetscFunctionReturn(0); 342 } 343 344 PetscErrorCode PCBDDCScalingDestroy(PC pc) 345 { 346 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 347 348 PetscFunctionBegin; 349 if (pcbddc->deluxe_ctx) { 350 PetscCall(PCBDDCScalingDestroy_Deluxe(pc)); 351 } 352 PetscCall(VecDestroy(&pcbddc->work_scaling)); 353 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL)); 354 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL)); 355 PetscFunctionReturn(0); 356 } 357 358 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) 359 { 360 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 361 PCBDDCDeluxeScaling deluxe_ctx; 362 363 PetscFunctionBegin; 364 PetscCall(PetscNew(&deluxe_ctx)); 365 pcbddc->deluxe_ctx = deluxe_ctx; 366 PetscFunctionReturn(0); 367 } 368 369 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) 370 { 371 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 372 373 PetscFunctionBegin; 374 PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx)); 375 PetscCall(PetscFree(pcbddc->deluxe_ctx)); 376 PetscFunctionReturn(0); 377 } 378 379 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) 380 { 381 PetscInt i; 382 383 PetscFunctionBegin; 384 PetscCall(PetscFree(deluxe_ctx->idx_simple_B)); 385 deluxe_ctx->n_simple = 0; 386 for (i=0;i<deluxe_ctx->seq_n;i++) { 387 PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i])); 388 PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i])); 389 PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i])); 390 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 391 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 392 } 393 PetscCall(PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum)); 394 PetscCall(PetscFree(deluxe_ctx->workspace)); 395 deluxe_ctx->seq_n = 0; 396 PetscFunctionReturn(0); 397 } 398 399 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) 400 { 401 PC_IS *pcis=(PC_IS*)pc->data; 402 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 403 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 404 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 405 406 PetscFunctionBegin; 407 /* reset data structures if the topology has changed */ 408 if (pcbddc->recompute_topography) { 409 PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx)); 410 } 411 412 /* Compute data structures to solve sequential problems */ 413 PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc)); 414 415 /* diagonal scaling on interface dofs not contained in cc */ 416 if (sub_schurs->is_vertices || sub_schurs->is_dir) { 417 PetscInt n_com,n_dir; 418 n_com = 0; 419 if (sub_schurs->is_vertices) { 420 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_com)); 421 } 422 n_dir = 0; 423 if (sub_schurs->is_dir) { 424 PetscCall(ISGetLocalSize(sub_schurs->is_dir,&n_dir)); 425 } 426 if (!deluxe_ctx->n_simple) { 427 deluxe_ctx->n_simple = n_dir + n_com; 428 PetscCall(PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B)); 429 if (sub_schurs->is_vertices) { 430 PetscInt nmap; 431 const PetscInt *idxs; 432 433 PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs)); 434 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B)); 435 PetscCheckFalse(nmap != n_com,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %D != %D",nmap,n_com); 436 PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 437 } 438 if (sub_schurs->is_dir) { 439 PetscInt nmap; 440 const PetscInt *idxs; 441 442 PetscCall(ISGetIndices(sub_schurs->is_dir,&idxs)); 443 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com)); 444 PetscCheckFalse(nmap != n_dir,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %D != %D",nmap,n_dir); 445 PetscCall(ISRestoreIndices(sub_schurs->is_dir,&idxs)); 446 } 447 PetscCall(PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B)); 448 } else { 449 PetscCheckFalse(deluxe_ctx->n_simple != n_dir + n_com,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %D is different from the previous one computed %D",n_dir + n_com,deluxe_ctx->n_simple); 450 } 451 } else { 452 deluxe_ctx->n_simple = 0; 453 deluxe_ctx->idx_simple_B = NULL; 454 } 455 PetscFunctionReturn(0); 456 } 457 458 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) 459 { 460 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 461 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 462 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 463 PetscScalar *matdata,*matdata2; 464 PetscInt i,max_subset_size,cum,cum2; 465 const PetscInt *idxs; 466 PetscBool newsetup = PETSC_FALSE; 467 468 PetscFunctionBegin; 469 PetscCheck(sub_schurs,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Missing PCBDDCSubSchurs"); 470 if (!sub_schurs->n_subs) PetscFunctionReturn(0); 471 472 /* Allocate arrays for subproblems */ 473 if (!deluxe_ctx->seq_n) { 474 deluxe_ctx->seq_n = sub_schurs->n_subs; 475 PetscCall(PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum)); 476 newsetup = PETSC_TRUE; 477 } else PetscCheckFalse(deluxe_ctx->seq_n != sub_schurs->n_subs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %D is different from the sub_schurs %D",deluxe_ctx->seq_n,sub_schurs->n_subs); 478 479 /* the change of basis is just a reference to sub_schurs->change (if any) */ 480 deluxe_ctx->change = sub_schurs->change; 481 deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; 482 483 /* Create objects for deluxe */ 484 max_subset_size = 0; 485 for (i=0;i<sub_schurs->n_subs;i++) { 486 PetscInt subset_size; 487 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 488 max_subset_size = PetscMax(subset_size,max_subset_size); 489 } 490 if (newsetup) { 491 PetscCall(PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace)); 492 } 493 cum = cum2 = 0; 494 PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs)); 495 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata)); 496 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2)); 497 for (i=0;i<deluxe_ctx->seq_n;i++) { 498 PetscInt subset_size; 499 500 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 501 if (newsetup) { 502 IS sub; 503 /* work vectors */ 504 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i])); 505 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i])); 506 507 /* scatters */ 508 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub)); 509 PetscCall(VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i])); 510 PetscCall(ISDestroy(&sub)); 511 } 512 513 /* S_E_j */ 514 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 515 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i])); 516 517 /* \sum_k S^k_E_j */ 518 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 519 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i])); 520 PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef)); 521 PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian)); 522 if (sub_schurs->is_hermitian) { 523 PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL)); 524 } else { 525 PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL)); 526 } 527 if (pcbddc->deluxe_singlemat) { 528 Mat X,Y; 529 if (!sub_schurs->is_hermitian) { 530 PetscCall(MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X)); 531 } else { 532 PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i])); 533 X = deluxe_ctx->seq_mat[i]; 534 } 535 PetscCall(MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y)); 536 if (!sub_schurs->is_hermitian) { 537 PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y)); 538 } else { 539 PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y)); 540 } 541 542 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 543 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 544 PetscCall(MatDestroy(&X)); 545 if (deluxe_ctx->change) { 546 Mat C,CY; 547 PetscCheck(deluxe_ctx->change_with_qr,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis"); 548 PetscCall(KSPGetOperators(deluxe_ctx->change[i],&C,NULL)); 549 PetscCall(MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY)); 550 PetscCall(MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y)); 551 PetscCall(MatDestroy(&CY)); 552 PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */ 553 } 554 PetscCall(MatTranspose(Y,MAT_INPLACE_MATRIX,&Y)); 555 deluxe_ctx->seq_mat[i] = Y; 556 } 557 cum += subset_size; 558 cum2 += subset_size*subset_size; 559 } 560 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs)); 561 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata)); 562 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2)); 563 if (pcbddc->deluxe_singlemat) { 564 deluxe_ctx->change = NULL; 565 deluxe_ctx->change_with_qr = PETSC_FALSE; 566 } 567 568 if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { 569 for (i=0;i<deluxe_ctx->seq_n;i++) { 570 if (newsetup) { 571 PC pc; 572 573 PetscCall(KSPGetPC(deluxe_ctx->change[i],&pc)); 574 PetscCall(PCSetType(pc,PCLU)); 575 PetscCall(KSPSetFromOptions(deluxe_ctx->change[i])); 576 } 577 PetscCall(KSPSetUp(deluxe_ctx->change[i])); 578 } 579 } 580 PetscFunctionReturn(0); 581 } 582