1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 6 /* prototypes for deluxe functions */ 7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC); 11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 12 13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense*)A->data; 16 PetscErrorCode ierr; 17 PetscScalar *b,*x; 18 PetscInt n; 19 PetscBLASInt nrhs,info,m; 20 PetscBool flg; 21 22 PetscFunctionBegin; 23 ierr = PetscBLASIntCast(A->rmap->n,&m);CHKERRQ(ierr); 24 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 25 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 26 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 27 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 28 29 ierr = MatGetSize(B,NULL,&n);CHKERRQ(ierr); 30 ierr = PetscBLASIntCast(n,&nrhs);CHKERRQ(ierr); 31 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 32 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 33 34 ierr = PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));CHKERRQ(ierr); 35 36 if (A->factortype == MAT_FACTOR_LU) { 37 #if defined(PETSC_MISSING_LAPACK_GETRS) 38 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable."); 39 #else 40 PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info)); 41 if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve"); 42 #endif 43 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported"); 44 45 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 46 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 47 ierr = PetscLogFlops(nrhs*(2.0*m*m - m));CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) 52 { 53 PC_IS* pcis = (PC_IS*)pc->data; 54 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 /* Apply partition of unity */ 59 ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr); 60 ierr = VecSet(global_vector,0.0);CHKERRQ(ierr); 61 ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 62 ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) 67 { 68 PC_IS* pcis=(PC_IS*)pc->data; 69 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 70 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr); 75 ierr = VecSet(y,0.0);CHKERRQ(ierr); 76 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 77 PetscInt i; 78 const PetscScalar *array_x,*array_D; 79 PetscScalar *array; 80 ierr = VecGetArrayRead(x,&array_x);CHKERRQ(ierr); 81 ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 82 ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr); 83 for (i=0;i<deluxe_ctx->n_simple;i++) { 84 array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]]; 85 } 86 ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr); 87 ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 88 ierr = VecRestoreArrayRead(x,&array_x);CHKERRQ(ierr); 89 } 90 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 91 if (deluxe_ctx->seq_mat) { 92 PetscInt i; 93 for (i=0;i<deluxe_ctx->seq_n;i++) { 94 if (deluxe_ctx->change) { 95 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 96 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 97 if (deluxe_ctx->change_with_qr) { 98 Mat change; 99 100 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 101 ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 102 } else { 103 ierr = KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 104 } 105 } else { 106 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 107 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 108 } 109 ierr = MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 110 if (deluxe_ctx->seq_mat_inv_sum[i]) { 111 PetscScalar *x; 112 113 ierr = VecGetArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr); 114 ierr = VecPlaceArray(deluxe_ctx->seq_work1[i],x);CHKERRQ(ierr); 115 ierr = VecRestoreArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr); 116 ierr = MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 117 ierr = VecResetArray(deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 118 } 119 if (deluxe_ctx->change) { 120 Mat change; 121 122 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 123 ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 124 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 125 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 126 } else { 127 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 128 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 129 } 130 } 131 } 132 /* put local boundary part in global vector */ 133 ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 134 ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) 139 { 140 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 141 PetscErrorCode ierr; 142 143 PetscFunctionBegin; 144 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 145 PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2); 146 PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3); 147 if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!"); 148 ierr = PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) 153 { 154 PetscErrorCode ierr; 155 PC_IS *pcis = (PC_IS*)pc->data; 156 157 PetscFunctionBegin; 158 ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 159 ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 160 /* Apply partition of unity */ 161 ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) 166 { 167 PC_IS* pcis=(PC_IS*)pc->data; 168 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 169 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 /* get local boundary part of global vector */ 174 ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 175 ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 176 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 177 PetscInt i; 178 PetscScalar *array_y; 179 const PetscScalar *array_D; 180 ierr = VecGetArray(y,&array_y);CHKERRQ(ierr); 181 ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 182 for (i=0;i<deluxe_ctx->n_simple;i++) { 183 array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; 184 } 185 ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 186 ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr); 187 } 188 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 189 if (deluxe_ctx->seq_mat) { 190 PetscInt i; 191 for (i=0;i<deluxe_ctx->seq_n;i++) { 192 if (deluxe_ctx->change) { 193 Mat change; 194 195 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 196 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 197 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 198 ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 199 } else { 200 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 201 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 202 } 203 if (deluxe_ctx->seq_mat_inv_sum[i]) { 204 PetscScalar *x; 205 206 ierr = VecGetArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr); 207 ierr = VecPlaceArray(deluxe_ctx->seq_work2[i],x);CHKERRQ(ierr); 208 ierr = VecRestoreArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr); 209 ierr = MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 210 ierr = VecResetArray(deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 211 } 212 ierr = MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 213 if (deluxe_ctx->change) { 214 if (deluxe_ctx->change_with_qr) { 215 Mat change; 216 217 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 218 ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 219 } else { 220 ierr = KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 221 } 222 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 223 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 224 } else { 225 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 226 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 227 } 228 } 229 } 230 PetscFunctionReturn(0); 231 } 232 233 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) 234 { 235 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 236 PetscErrorCode ierr; 237 238 PetscFunctionBegin; 239 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 240 PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2); 241 PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3); 242 if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!"); 243 ierr = PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 PetscErrorCode PCBDDCScalingSetUp(PC pc) 248 { 249 PC_IS* pcis=(PC_IS*)pc->data; 250 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 251 PetscErrorCode ierr; 252 253 PetscFunctionBegin; 254 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 255 ierr = PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 256 /* create work vector for the operator */ 257 ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr); 258 ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr); 259 /* always rebuild pcis->D */ 260 if (pcis->use_stiffness_scaling) { 261 PetscScalar *a; 262 PetscInt i,n; 263 264 ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr); 265 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 266 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 267 ierr = VecAbs(pcis->D);CHKERRQ(ierr); 268 ierr = VecGetLocalSize(pcis->D,&n);CHKERRQ(ierr); 269 ierr = VecGetArray(pcis->D,&a);CHKERRQ(ierr); 270 for (i=0;i<n;i++) if (PetscAbsScalar(a[i])<PETSC_SMALL) a[i] = 1.0; 271 ierr = VecRestoreArray(pcis->D,&a);CHKERRQ(ierr); 272 } 273 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 274 ierr = VecScatterBegin(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 275 ierr = VecScatterEnd(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 276 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 277 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 278 ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 279 /* now setup */ 280 if (pcbddc->use_deluxe_scaling) { 281 if (!pcbddc->deluxe_ctx) { 282 ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr); 283 } 284 ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr); 285 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr); 286 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr); 287 } else { 288 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr); 289 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr); 290 } 291 292 /* test */ 293 if (pcbddc->dbg_flag) { 294 Mat B0_B = NULL; 295 Vec B0_Bv = NULL, B0_Bv2 = NULL; 296 Vec vec2_global; 297 PetscViewer viewer = pcbddc->dbg_viewer; 298 PetscReal error; 299 300 /* extension -> from local to parallel */ 301 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 302 ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr); 303 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 304 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 305 ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr); 306 ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr); 307 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 308 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 309 if (pcbddc->benign_n) { 310 IS is_dummy; 311 312 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 313 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 314 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 315 ierr = MatCreateVecs(B0_B,NULL,&B0_Bv);CHKERRQ(ierr); 316 ierr = VecDuplicate(B0_Bv,&B0_Bv2);CHKERRQ(ierr); 317 ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv);CHKERRQ(ierr); 318 } 319 ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr); 320 if (pcbddc->benign_saddle_point) { 321 PetscReal errorl = 0.; 322 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 323 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 324 if (pcbddc->benign_n) { 325 ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv2);CHKERRQ(ierr); 326 ierr = VecAXPY(B0_Bv,-1.0,B0_Bv2);CHKERRQ(ierr); 327 ierr = VecNorm(B0_Bv,NORM_INFINITY,&errorl);CHKERRQ(ierr); 328 } 329 ierr = MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 330 ierr = PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);CHKERRQ(ierr); 331 } 332 ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr); 333 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr); 334 ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr); 335 ierr = VecDestroy(&vec2_global);CHKERRQ(ierr); 336 337 /* restriction -> from parallel to local */ 338 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 339 ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr); 340 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 341 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 342 ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr); 343 ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr); 344 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 345 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 346 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr); 347 ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr); 348 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 349 ierr = VecDestroy(&B0_Bv);CHKERRQ(ierr); 350 ierr = VecDestroy(&B0_Bv2);CHKERRQ(ierr); 351 } 352 ierr = PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 353 PetscFunctionReturn(0); 354 } 355 356 PetscErrorCode PCBDDCScalingDestroy(PC pc) 357 { 358 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 359 PetscErrorCode ierr; 360 361 PetscFunctionBegin; 362 if (pcbddc->deluxe_ctx) { 363 ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr); 364 } 365 ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr); 366 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr); 367 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr); 368 PetscFunctionReturn(0); 369 } 370 371 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) 372 { 373 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 374 PCBDDCDeluxeScaling deluxe_ctx; 375 PetscErrorCode ierr; 376 377 PetscFunctionBegin; 378 ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr); 379 pcbddc->deluxe_ctx = deluxe_ctx; 380 PetscFunctionReturn(0); 381 } 382 383 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) 384 { 385 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 386 PetscErrorCode ierr; 387 388 PetscFunctionBegin; 389 ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr); 390 ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr); 391 PetscFunctionReturn(0); 392 } 393 394 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) 395 { 396 PetscInt i; 397 PetscErrorCode ierr; 398 399 PetscFunctionBegin; 400 ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 401 deluxe_ctx->n_simple = 0; 402 for (i=0;i<deluxe_ctx->seq_n;i++) { 403 ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr); 404 ierr = VecDestroy(&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 405 ierr = VecDestroy(&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 406 ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 407 ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 408 } 409 ierr = PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr); 410 ierr = PetscFree(deluxe_ctx->workspace);CHKERRQ(ierr); 411 deluxe_ctx->seq_n = 0; 412 PetscFunctionReturn(0); 413 } 414 415 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) 416 { 417 PC_IS *pcis=(PC_IS*)pc->data; 418 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 419 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 420 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 421 PetscErrorCode ierr; 422 423 PetscFunctionBegin; 424 /* reset data structures if the topology has changed */ 425 if (pcbddc->recompute_topography) { 426 ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr); 427 } 428 429 /* Compute data structures to solve sequential problems */ 430 ierr = PCBDDCScalingSetUp_Deluxe_Private(pc);CHKERRQ(ierr); 431 432 /* diagonal scaling on interface dofs not contained in cc */ 433 if (sub_schurs->is_vertices || sub_schurs->is_dir) { 434 PetscInt n_com,n_dir; 435 n_com = 0; 436 if (sub_schurs->is_vertices) { 437 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_com);CHKERRQ(ierr); 438 } 439 n_dir = 0; 440 if (sub_schurs->is_dir) { 441 ierr = ISGetLocalSize(sub_schurs->is_dir,&n_dir);CHKERRQ(ierr); 442 } 443 if (!deluxe_ctx->n_simple) { 444 deluxe_ctx->n_simple = n_dir + n_com; 445 ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 446 if (sub_schurs->is_vertices) { 447 PetscInt nmap; 448 const PetscInt *idxs; 449 450 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 451 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 452 if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %D != %D",nmap,n_com); 453 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 454 } 455 if (sub_schurs->is_dir) { 456 PetscInt nmap; 457 const PetscInt *idxs; 458 459 ierr = ISGetIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr); 460 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);CHKERRQ(ierr); 461 if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %D != %D",nmap,n_dir); 462 ierr = ISRestoreIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr); 463 } 464 ierr = PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 465 } else { 466 if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %D is different from the previous one computed %D",n_dir + n_com,deluxe_ctx->n_simple); 467 } 468 } else { 469 deluxe_ctx->n_simple = 0; 470 deluxe_ctx->idx_simple_B = 0; 471 } 472 PetscFunctionReturn(0); 473 } 474 475 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) 476 { 477 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 478 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 479 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 480 PetscScalar *matdata,*matdata2; 481 PetscInt i,max_subset_size,cum,cum2; 482 const PetscInt *idxs; 483 PetscBool newsetup = PETSC_FALSE; 484 PetscErrorCode ierr; 485 486 PetscFunctionBegin; 487 if (!sub_schurs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Missing PCBDDCSubSchurs"); 488 if (!sub_schurs->n_subs) PetscFunctionReturn(0); 489 490 /* Allocate arrays for subproblems */ 491 if (!deluxe_ctx->seq_n) { 492 deluxe_ctx->seq_n = sub_schurs->n_subs; 493 ierr = PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr); 494 newsetup = PETSC_TRUE; 495 } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %D is different from the sub_schurs %D",deluxe_ctx->seq_n,sub_schurs->n_subs); 496 497 /* the change of basis is just a reference to sub_schurs->change (if any) */ 498 deluxe_ctx->change = sub_schurs->change; 499 deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; 500 501 /* Create objects for deluxe */ 502 max_subset_size = 0; 503 for (i=0;i<sub_schurs->n_subs;i++) { 504 PetscInt subset_size; 505 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 506 max_subset_size = PetscMax(subset_size,max_subset_size); 507 } 508 if (newsetup) { 509 ierr = PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);CHKERRQ(ierr); 510 } 511 cum = cum2 = 0; 512 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr); 513 ierr = MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr); 514 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr); 515 for (i=0;i<deluxe_ctx->seq_n;i++) { 516 PetscInt subset_size; 517 518 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 519 if (newsetup) { 520 IS sub; 521 /* work vectors */ 522 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 523 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 524 525 /* scatters */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);CHKERRQ(ierr); 527 ierr = VecScatterCreateWithData(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr); 528 ierr = ISDestroy(&sub);CHKERRQ(ierr); 529 } 530 531 /* S_E_j */ 532 ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 534 535 /* \sum_k S^k_E_j */ 536 ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 538 ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);CHKERRQ(ierr); 539 ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);CHKERRQ(ierr); 540 if (sub_schurs->is_hermitian) { 541 ierr = MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);CHKERRQ(ierr); 542 } else { 543 ierr = MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);CHKERRQ(ierr); 544 } 545 if (pcbddc->deluxe_singlemat) { 546 Mat X,Y; 547 if (!sub_schurs->is_hermitian) { 548 ierr = MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);CHKERRQ(ierr); 549 } else { 550 ierr = PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 551 X = deluxe_ctx->seq_mat[i]; 552 } 553 ierr = MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);CHKERRQ(ierr); 554 if (!sub_schurs->is_hermitian) { 555 ierr = PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr); 556 } else { 557 ierr = MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr); 558 } 559 560 ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 561 ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 562 ierr = MatDestroy(&X);CHKERRQ(ierr); 563 if (deluxe_ctx->change) { 564 Mat C,CY; 565 566 if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis"); 567 ierr = KSPGetOperators(deluxe_ctx->change[i],&C,NULL);CHKERRQ(ierr); 568 ierr = MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);CHKERRQ(ierr); 569 ierr = MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr); 570 ierr = MatDestroy(&CY);CHKERRQ(ierr); 571 } 572 ierr = MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);CHKERRQ(ierr); 573 deluxe_ctx->seq_mat[i] = Y; 574 } 575 cum += subset_size; 576 cum2 += subset_size*subset_size; 577 } 578 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr); 579 ierr = MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr); 580 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr); 581 if (pcbddc->deluxe_singlemat) { 582 deluxe_ctx->change = NULL; 583 deluxe_ctx->change_with_qr = PETSC_FALSE; 584 } 585 586 if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { 587 for (i=0;i<deluxe_ctx->seq_n;i++) { 588 if (newsetup) { 589 PC pc; 590 591 ierr = KSPGetPC(deluxe_ctx->change[i],&pc);CHKERRQ(ierr); 592 ierr = PCSetType(pc,PCLU);CHKERRQ(ierr); 593 ierr = KSPSetFromOptions(deluxe_ctx->change[i]);CHKERRQ(ierr); 594 } 595 ierr = KSPSetUp(deluxe_ctx->change[i]);CHKERRQ(ierr); 596 } 597 } 598 PetscFunctionReturn(0); 599 } 600