1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 6 /* prototypes for deluxe functions */ 7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC); 11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 12 13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense*)A->data; 16 PetscErrorCode ierr; 17 PetscScalar *b,*x; 18 PetscInt n; 19 PetscBLASInt nrhs,info,m; 20 PetscBool flg; 21 22 PetscFunctionBegin; 23 ierr = PetscBLASIntCast(A->rmap->n,&m);CHKERRQ(ierr); 24 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 25 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 26 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 27 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 28 29 ierr = MatGetSize(B,NULL,&n);CHKERRQ(ierr); 30 ierr = PetscBLASIntCast(n,&nrhs);CHKERRQ(ierr); 31 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 32 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 33 34 ierr = PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));CHKERRQ(ierr); 35 36 if (A->factortype == MAT_FACTOR_LU) { 37 #if defined(PETSC_MISSING_LAPACK_GETRS) 38 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable."); 39 #else 40 PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info)); 41 if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve"); 42 #endif 43 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 44 #if defined(PETSC_MISSING_LAPACK_POTRS) 45 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRS - Lapack routine is unavailable."); 46 #else 47 PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("L",&m,&nrhs,mat->v,&mat->lda,x,&m,&info)); 48 if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"POTRS Bad solve"); 49 #endif 50 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be factored to solve"); 51 52 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 53 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 54 ierr = PetscLogFlops(nrhs*(2.0*m*m - m));CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 59 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) 60 { 61 PC_IS* pcis = (PC_IS*)pc->data; 62 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 /* Apply partition of unity */ 67 ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr); 68 ierr = VecSet(global_vector,0.0);CHKERRQ(ierr); 69 ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 70 ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) 75 { 76 PC_IS* pcis=(PC_IS*)pc->data; 77 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 78 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr); 83 ierr = VecSet(y,0.0);CHKERRQ(ierr); 84 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 85 PetscInt i; 86 const PetscScalar *array_x,*array_D; 87 PetscScalar *array; 88 ierr = VecGetArrayRead(x,&array_x);CHKERRQ(ierr); 89 ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 90 ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr); 91 for (i=0;i<deluxe_ctx->n_simple;i++) { 92 array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]]; 93 } 94 ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr); 95 ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 96 ierr = VecRestoreArrayRead(x,&array_x);CHKERRQ(ierr); 97 } 98 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 99 if (deluxe_ctx->seq_mat) { 100 PetscInt i; 101 for (i=0;i<deluxe_ctx->seq_n;i++) { 102 if (deluxe_ctx->change) { 103 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 104 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 105 if (deluxe_ctx->change_with_qr) { 106 Mat change; 107 108 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 109 ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 110 } else { 111 ierr = KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 112 } 113 } else { 114 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 115 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 116 } 117 ierr = MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 118 if (deluxe_ctx->seq_mat_inv_sum[i]) { 119 PetscScalar *x; 120 121 ierr = VecGetArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr); 122 ierr = VecPlaceArray(deluxe_ctx->seq_work1[i],x);CHKERRQ(ierr); 123 ierr = VecRestoreArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr); 124 ierr = MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 125 ierr = VecResetArray(deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 126 } 127 if (deluxe_ctx->change) { 128 Mat change; 129 130 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 131 ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 132 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 133 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 134 } else { 135 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 136 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 137 } 138 } 139 } 140 /* put local boundary part in global vector */ 141 ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 142 ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) 147 { 148 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 149 PetscErrorCode ierr; 150 151 PetscFunctionBegin; 152 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 153 PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2); 154 PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3); 155 if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n"); 156 ierr = PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr); 157 PetscFunctionReturn(0); 158 } 159 160 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) 161 { 162 PetscErrorCode ierr; 163 PC_IS *pcis = (PC_IS*)pc->data; 164 165 PetscFunctionBegin; 166 ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 167 ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 168 /* Apply partition of unity */ 169 ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr); 170 PetscFunctionReturn(0); 171 } 172 173 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) 174 { 175 PC_IS* pcis=(PC_IS*)pc->data; 176 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 177 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 /* get local boundary part of global vector */ 182 ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 183 ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 184 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 185 PetscInt i; 186 PetscScalar *array_y; 187 const PetscScalar *array_D; 188 ierr = VecGetArray(y,&array_y);CHKERRQ(ierr); 189 ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 190 for (i=0;i<deluxe_ctx->n_simple;i++) { 191 array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; 192 } 193 ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 194 ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr); 195 } 196 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 197 if (deluxe_ctx->seq_mat) { 198 PetscInt i; 199 for (i=0;i<deluxe_ctx->seq_n;i++) { 200 if (deluxe_ctx->change) { 201 Mat change; 202 203 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 204 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 205 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 206 ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 207 } else { 208 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 209 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 210 } 211 if (deluxe_ctx->seq_mat_inv_sum[i]) { 212 PetscScalar *x; 213 214 ierr = VecGetArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr); 215 ierr = VecPlaceArray(deluxe_ctx->seq_work2[i],x);CHKERRQ(ierr); 216 ierr = VecRestoreArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr); 217 ierr = MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 218 ierr = VecResetArray(deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 219 } 220 ierr = MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 221 if (deluxe_ctx->change) { 222 if (deluxe_ctx->change_with_qr) { 223 Mat change; 224 225 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 226 ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 227 } else { 228 ierr = KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 229 } 230 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 231 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 232 } else { 233 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 234 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 235 } 236 } 237 } 238 PetscFunctionReturn(0); 239 } 240 241 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) 242 { 243 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 244 PetscErrorCode ierr; 245 246 PetscFunctionBegin; 247 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 248 PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2); 249 PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3); 250 if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n"); 251 ierr = PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 PetscErrorCode PCBDDCScalingSetUp(PC pc) 256 { 257 PC_IS* pcis=(PC_IS*)pc->data; 258 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 259 PetscErrorCode ierr; 260 261 PetscFunctionBegin; 262 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 263 /* create work vector for the operator */ 264 ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr); 265 ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr); 266 /* always rebuild pcis->D */ 267 if (pcis->use_stiffness_scaling) { 268 ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr); 269 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 270 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 271 } 272 ierr = VecCopy(pcis->D,pcis->vec1_B);CHKERRQ(ierr); 273 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 274 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 275 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 276 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 277 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 278 ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 279 /* now setup */ 280 if (pcbddc->use_deluxe_scaling) { 281 if (!pcbddc->deluxe_ctx) { 282 ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr); 283 } 284 ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr); 285 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr); 286 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr); 287 } else { 288 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr); 289 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr); 290 } 291 292 /* test */ 293 if (pcbddc->dbg_flag) { 294 Mat B0_B = NULL; 295 Vec B0_Bv = NULL, B0_Bv2 = NULL; 296 Vec vec2_global; 297 PetscViewer viewer = pcbddc->dbg_viewer; 298 PetscReal error; 299 300 /* extension -> from local to parallel */ 301 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 302 ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr); 303 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 304 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 305 ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr); 306 ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr); 307 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 308 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 309 if (pcbddc->benign_n) { 310 IS is_dummy; 311 312 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 313 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 314 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 315 ierr = MatCreateVecs(B0_B,NULL,&B0_Bv);CHKERRQ(ierr); 316 ierr = VecDuplicate(B0_Bv,&B0_Bv2);CHKERRQ(ierr); 317 ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv);CHKERRQ(ierr); 318 } 319 ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr); 320 if (pcbddc->benign_saddle_point) { 321 PetscReal errorl = 0.; 322 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 323 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 324 if (pcbddc->benign_n) { 325 ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv2);CHKERRQ(ierr); 326 ierr = VecAXPY(B0_Bv,-1.0,B0_Bv2);CHKERRQ(ierr); 327 ierr = VecNorm(B0_Bv,NORM_INFINITY,&errorl);CHKERRQ(ierr); 328 } 329 ierr = MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 330 ierr = PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);CHKERRQ(ierr); 331 } 332 ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr); 333 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr); 334 ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr); 335 ierr = VecDestroy(&vec2_global);CHKERRQ(ierr); 336 337 /* restriction -> from parallel to local */ 338 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 339 ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr); 340 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 341 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 342 ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr); 343 ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr); 344 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 345 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 346 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr); 347 ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr); 348 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 349 ierr = VecDestroy(&B0_Bv);CHKERRQ(ierr); 350 ierr = VecDestroy(&B0_Bv2);CHKERRQ(ierr); 351 } 352 PetscFunctionReturn(0); 353 } 354 355 PetscErrorCode PCBDDCScalingDestroy(PC pc) 356 { 357 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 358 PetscErrorCode ierr; 359 360 PetscFunctionBegin; 361 if (pcbddc->deluxe_ctx) { 362 ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr); 363 } 364 ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr); 365 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr); 366 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) 371 { 372 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 373 PCBDDCDeluxeScaling deluxe_ctx; 374 PetscErrorCode ierr; 375 376 PetscFunctionBegin; 377 ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr); 378 pcbddc->deluxe_ctx = deluxe_ctx; 379 PetscFunctionReturn(0); 380 } 381 382 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) 383 { 384 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 385 PetscErrorCode ierr; 386 387 PetscFunctionBegin; 388 ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr); 389 ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) 394 { 395 PetscInt i; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 400 deluxe_ctx->n_simple = 0; 401 for (i=0;i<deluxe_ctx->seq_n;i++) { 402 ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr); 403 ierr = VecDestroy(&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 404 ierr = VecDestroy(&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 405 ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 406 ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 407 } 408 ierr = PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr); 409 ierr = PetscFree(deluxe_ctx->workspace);CHKERRQ(ierr); 410 deluxe_ctx->seq_n = 0; 411 PetscFunctionReturn(0); 412 } 413 414 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) 415 { 416 PC_IS *pcis=(PC_IS*)pc->data; 417 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 418 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 419 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 420 PetscErrorCode ierr; 421 422 PetscFunctionBegin; 423 /* reset data structures if the topology has changed */ 424 if (pcbddc->recompute_topography) { 425 ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr); 426 } 427 428 /* Compute data structures to solve sequential problems */ 429 ierr = PCBDDCScalingSetUp_Deluxe_Private(pc);CHKERRQ(ierr); 430 431 /* diagonal scaling on interface dofs not contained in cc */ 432 if (sub_schurs->is_vertices || sub_schurs->is_dir) { 433 PetscInt n_com,n_dir; 434 n_com = 0; 435 if (sub_schurs->is_vertices) { 436 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_com);CHKERRQ(ierr); 437 } 438 n_dir = 0; 439 if (sub_schurs->is_dir) { 440 ierr = ISGetLocalSize(sub_schurs->is_dir,&n_dir);CHKERRQ(ierr); 441 } 442 if (!deluxe_ctx->n_simple) { 443 deluxe_ctx->n_simple = n_dir + n_com; 444 ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 445 if (sub_schurs->is_vertices) { 446 PetscInt nmap; 447 const PetscInt *idxs; 448 449 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 450 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 451 if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %d != %d",nmap,n_com); 452 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 453 } 454 if (sub_schurs->is_dir) { 455 PetscInt nmap; 456 const PetscInt *idxs; 457 458 ierr = ISGetIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr); 459 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);CHKERRQ(ierr); 460 if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %d != %d",nmap,n_dir); 461 ierr = ISRestoreIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr); 462 } 463 ierr = PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 464 } else { 465 if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %d is different from the previous one computed %d",n_dir + n_com,deluxe_ctx->n_simple); 466 } 467 } else { 468 deluxe_ctx->n_simple = 0; 469 deluxe_ctx->idx_simple_B = 0; 470 } 471 PetscFunctionReturn(0); 472 } 473 474 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) 475 { 476 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 477 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 478 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 479 PetscScalar *matdata,*matdata2; 480 PetscInt i,max_subset_size,cum,cum2; 481 const PetscInt *idxs; 482 PetscBool newsetup = PETSC_FALSE; 483 PetscErrorCode ierr; 484 485 PetscFunctionBegin; 486 if (!sub_schurs->n_subs) { 487 PetscFunctionReturn(0); 488 } 489 490 /* Allocate arrays for subproblems */ 491 if (!deluxe_ctx->seq_n) { 492 deluxe_ctx->seq_n = sub_schurs->n_subs; 493 ierr = PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr); 494 newsetup = PETSC_TRUE; 495 } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) { 496 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %d is different from the sub_schurs %d",deluxe_ctx->seq_n,sub_schurs->n_subs); 497 } 498 deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; 499 500 /* Create objects for deluxe */ 501 max_subset_size = 0; 502 for (i=0;i<sub_schurs->n_subs;i++) { 503 PetscInt subset_size; 504 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 505 max_subset_size = PetscMax(subset_size,max_subset_size); 506 } 507 if (newsetup) { 508 ierr = PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);CHKERRQ(ierr); 509 } 510 cum = cum2 = 0; 511 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr); 512 ierr = MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr); 513 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr); 514 for (i=0;i<deluxe_ctx->seq_n;i++) { 515 PetscInt subset_size; 516 517 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 518 if (newsetup) { 519 IS sub; 520 /* work vectors */ 521 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 522 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 523 524 /* scatters */ 525 ierr = ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);CHKERRQ(ierr); 526 ierr = VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr); 527 ierr = ISDestroy(&sub);CHKERRQ(ierr); 528 } 529 530 /* S_E_j */ 531 ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 532 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 533 534 /* \sum_k S^k_E_j */ 535 ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 537 538 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 539 ierr = MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);CHKERRQ(ierr); 540 } else { 541 ierr = MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);CHKERRQ(ierr); 542 } 543 if (pcbddc->deluxe_singlemat) { 544 Mat X,Y; 545 546 if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) { 547 ierr = MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);CHKERRQ(ierr); 548 } else { 549 ierr = PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 550 X = deluxe_ctx->seq_mat[i]; 551 } 552 ierr = MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);CHKERRQ(ierr); 553 if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) { 554 ierr = PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr); 555 } else { 556 ierr = MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr); 557 } 558 559 ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 560 ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 561 ierr = MatDestroy(&X);CHKERRQ(ierr); 562 ierr = MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);CHKERRQ(ierr); 563 deluxe_ctx->seq_mat[i] = Y; 564 } 565 cum += subset_size; 566 cum2 += subset_size*subset_size; 567 } 568 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr); 569 ierr = MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr); 571 572 /* the change of basis is just a reference to sub_schurs->change (if any) */ 573 deluxe_ctx->change = sub_schurs->change; 574 if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { 575 for (i=0;i<deluxe_ctx->seq_n;i++) { 576 if (newsetup) { 577 PC pc; 578 579 ierr = KSPGetPC(deluxe_ctx->change[i],&pc);CHKERRQ(ierr); 580 ierr = PCSetType(pc,PCLU);CHKERRQ(ierr); 581 ierr = KSPSetFromOptions(deluxe_ctx->change[i]);CHKERRQ(ierr); 582 } 583 ierr = KSPSetUp(deluxe_ctx->change[i]);CHKERRQ(ierr); 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588