1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 6 /* prototypes for deluxe functions */ 7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC); 11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 12 13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense*)A->data; 16 PetscErrorCode ierr; 17 const PetscScalar *b; 18 PetscScalar *x; 19 PetscInt n; 20 PetscBLASInt nrhs,info,m; 21 PetscBool flg; 22 23 PetscFunctionBegin; 24 ierr = PetscBLASIntCast(A->rmap->n,&m);CHKERRQ(ierr); 25 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 26 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 27 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 28 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 29 30 ierr = MatGetSize(B,NULL,&n);CHKERRQ(ierr); 31 ierr = PetscBLASIntCast(n,&nrhs);CHKERRQ(ierr); 32 ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 33 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 34 ierr = PetscArraycpy(x,b,m*nrhs);CHKERRQ(ierr); 35 ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 36 37 if (A->factortype == MAT_FACTOR_LU) { 38 PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info)); 39 if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve"); 40 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported"); 41 42 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 43 ierr = PetscLogFlops(nrhs*(2.0*m*m - m));CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) 48 { 49 PC_IS* pcis = (PC_IS*)pc->data; 50 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 /* Apply partition of unity */ 55 ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr); 56 ierr = VecSet(global_vector,0.0);CHKERRQ(ierr); 57 ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 58 ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 59 PetscFunctionReturn(0); 60 } 61 62 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) 63 { 64 PC_IS* pcis=(PC_IS*)pc->data; 65 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 66 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 67 PetscErrorCode ierr; 68 69 PetscFunctionBegin; 70 ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr); 71 ierr = VecSet(y,0.0);CHKERRQ(ierr); 72 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 73 PetscInt i; 74 const PetscScalar *array_x,*array_D; 75 PetscScalar *array; 76 ierr = VecGetArrayRead(x,&array_x);CHKERRQ(ierr); 77 ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 78 ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr); 79 for (i=0;i<deluxe_ctx->n_simple;i++) { 80 array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]]; 81 } 82 ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr); 83 ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 84 ierr = VecRestoreArrayRead(x,&array_x);CHKERRQ(ierr); 85 } 86 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 87 if (deluxe_ctx->seq_mat) { 88 PetscInt i; 89 for (i=0;i<deluxe_ctx->seq_n;i++) { 90 if (deluxe_ctx->change) { 91 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 92 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 93 if (deluxe_ctx->change_with_qr) { 94 Mat change; 95 96 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 97 ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 98 } else { 99 ierr = KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 100 } 101 } else { 102 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 103 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 104 } 105 ierr = MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 106 if (deluxe_ctx->seq_mat_inv_sum[i]) { 107 PetscScalar *x; 108 109 ierr = VecGetArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr); 110 ierr = VecPlaceArray(deluxe_ctx->seq_work1[i],x);CHKERRQ(ierr); 111 ierr = VecRestoreArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr); 112 ierr = MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 113 ierr = VecResetArray(deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 114 } 115 if (deluxe_ctx->change) { 116 Mat change; 117 118 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 119 ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 120 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 121 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 122 } else { 123 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 124 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 125 } 126 } 127 } 128 /* put local boundary part in global vector */ 129 ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 130 ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) 135 { 136 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 137 PetscErrorCode ierr; 138 139 PetscFunctionBegin; 140 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 141 PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2); 142 PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3); 143 if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!"); 144 ierr = PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) 149 { 150 PetscErrorCode ierr; 151 PC_IS *pcis = (PC_IS*)pc->data; 152 153 PetscFunctionBegin; 154 ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 155 ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 156 /* Apply partition of unity */ 157 ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr); 158 PetscFunctionReturn(0); 159 } 160 161 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) 162 { 163 PC_IS* pcis=(PC_IS*)pc->data; 164 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 165 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 166 PetscErrorCode ierr; 167 168 PetscFunctionBegin; 169 /* get local boundary part of global vector */ 170 ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 171 ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 172 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 173 PetscInt i; 174 PetscScalar *array_y; 175 const PetscScalar *array_D; 176 ierr = VecGetArray(y,&array_y);CHKERRQ(ierr); 177 ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 178 for (i=0;i<deluxe_ctx->n_simple;i++) { 179 array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; 180 } 181 ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 182 ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr); 183 } 184 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 185 if (deluxe_ctx->seq_mat) { 186 PetscInt i; 187 for (i=0;i<deluxe_ctx->seq_n;i++) { 188 if (deluxe_ctx->change) { 189 Mat change; 190 191 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 192 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 193 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 194 ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 195 } else { 196 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 197 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 198 } 199 if (deluxe_ctx->seq_mat_inv_sum[i]) { 200 PetscScalar *x; 201 202 ierr = VecGetArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr); 203 ierr = VecPlaceArray(deluxe_ctx->seq_work2[i],x);CHKERRQ(ierr); 204 ierr = VecRestoreArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr); 205 ierr = MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 206 ierr = VecResetArray(deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 207 } 208 ierr = MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 209 if (deluxe_ctx->change) { 210 if (deluxe_ctx->change_with_qr) { 211 Mat change; 212 213 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 214 ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 215 } else { 216 ierr = KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 217 } 218 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 219 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 220 } else { 221 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 222 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 223 } 224 } 225 } 226 PetscFunctionReturn(0); 227 } 228 229 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) 230 { 231 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 232 PetscErrorCode ierr; 233 234 PetscFunctionBegin; 235 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 236 PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2); 237 PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3); 238 if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!"); 239 ierr = PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 243 PetscErrorCode PCBDDCScalingSetUp(PC pc) 244 { 245 PC_IS* pcis=(PC_IS*)pc->data; 246 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 247 PetscErrorCode ierr; 248 249 PetscFunctionBegin; 250 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 251 ierr = PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 252 /* create work vector for the operator */ 253 ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr); 254 ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr); 255 /* always rebuild pcis->D */ 256 if (pcis->use_stiffness_scaling) { 257 PetscScalar *a; 258 PetscInt i,n; 259 260 ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr); 261 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 262 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 263 ierr = VecAbs(pcis->D);CHKERRQ(ierr); 264 ierr = VecGetLocalSize(pcis->D,&n);CHKERRQ(ierr); 265 ierr = VecGetArray(pcis->D,&a);CHKERRQ(ierr); 266 for (i=0;i<n;i++) if (PetscAbsScalar(a[i])<PETSC_SMALL) a[i] = 1.0; 267 ierr = VecRestoreArray(pcis->D,&a);CHKERRQ(ierr); 268 } 269 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 270 ierr = VecScatterBegin(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 271 ierr = VecScatterEnd(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 272 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 273 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 274 ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 275 /* now setup */ 276 if (pcbddc->use_deluxe_scaling) { 277 if (!pcbddc->deluxe_ctx) { 278 ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr); 279 } 280 ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr); 281 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr); 282 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr); 283 } else { 284 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr); 285 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr); 286 } 287 ierr = PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 288 289 /* test */ 290 if (pcbddc->dbg_flag) { 291 Mat B0_B = NULL; 292 Vec B0_Bv = NULL, B0_Bv2 = NULL; 293 Vec vec2_global; 294 PetscViewer viewer = pcbddc->dbg_viewer; 295 PetscReal error; 296 297 /* extension -> from local to parallel */ 298 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 299 ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr); 300 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 301 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 302 ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr); 303 ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr); 304 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 305 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 306 if (pcbddc->benign_n) { 307 IS is_dummy; 308 309 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 310 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 311 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 312 ierr = MatCreateVecs(B0_B,NULL,&B0_Bv);CHKERRQ(ierr); 313 ierr = VecDuplicate(B0_Bv,&B0_Bv2);CHKERRQ(ierr); 314 ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv);CHKERRQ(ierr); 315 } 316 ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr); 317 if (pcbddc->benign_saddle_point) { 318 PetscReal errorl = 0.; 319 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 320 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 321 if (pcbddc->benign_n) { 322 ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv2);CHKERRQ(ierr); 323 ierr = VecAXPY(B0_Bv,-1.0,B0_Bv2);CHKERRQ(ierr); 324 ierr = VecNorm(B0_Bv,NORM_INFINITY,&errorl);CHKERRQ(ierr); 325 } 326 ierr = MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 327 ierr = PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);CHKERRQ(ierr); 328 } 329 ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr); 330 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr); 331 ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr); 332 ierr = VecDestroy(&vec2_global);CHKERRQ(ierr); 333 334 /* restriction -> from parallel to local */ 335 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 336 ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr); 337 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 338 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 339 ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr); 340 ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr); 341 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 342 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 343 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr); 344 ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr); 345 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 346 ierr = VecDestroy(&B0_Bv);CHKERRQ(ierr); 347 ierr = VecDestroy(&B0_Bv2);CHKERRQ(ierr); 348 } 349 PetscFunctionReturn(0); 350 } 351 352 PetscErrorCode PCBDDCScalingDestroy(PC pc) 353 { 354 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 355 PetscErrorCode ierr; 356 357 PetscFunctionBegin; 358 if (pcbddc->deluxe_ctx) { 359 ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr); 360 } 361 ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr); 362 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr); 363 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr); 364 PetscFunctionReturn(0); 365 } 366 367 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) 368 { 369 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 370 PCBDDCDeluxeScaling deluxe_ctx; 371 PetscErrorCode ierr; 372 373 PetscFunctionBegin; 374 ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr); 375 pcbddc->deluxe_ctx = deluxe_ctx; 376 PetscFunctionReturn(0); 377 } 378 379 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) 380 { 381 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 382 PetscErrorCode ierr; 383 384 PetscFunctionBegin; 385 ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr); 386 ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr); 387 PetscFunctionReturn(0); 388 } 389 390 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) 391 { 392 PetscInt i; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 397 deluxe_ctx->n_simple = 0; 398 for (i=0;i<deluxe_ctx->seq_n;i++) { 399 ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr); 400 ierr = VecDestroy(&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 401 ierr = VecDestroy(&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 402 ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 403 ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 404 } 405 ierr = PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr); 406 ierr = PetscFree(deluxe_ctx->workspace);CHKERRQ(ierr); 407 deluxe_ctx->seq_n = 0; 408 PetscFunctionReturn(0); 409 } 410 411 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) 412 { 413 PC_IS *pcis=(PC_IS*)pc->data; 414 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 415 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 416 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 417 PetscErrorCode ierr; 418 419 PetscFunctionBegin; 420 /* reset data structures if the topology has changed */ 421 if (pcbddc->recompute_topography) { 422 ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr); 423 } 424 425 /* Compute data structures to solve sequential problems */ 426 ierr = PCBDDCScalingSetUp_Deluxe_Private(pc);CHKERRQ(ierr); 427 428 /* diagonal scaling on interface dofs not contained in cc */ 429 if (sub_schurs->is_vertices || sub_schurs->is_dir) { 430 PetscInt n_com,n_dir; 431 n_com = 0; 432 if (sub_schurs->is_vertices) { 433 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_com);CHKERRQ(ierr); 434 } 435 n_dir = 0; 436 if (sub_schurs->is_dir) { 437 ierr = ISGetLocalSize(sub_schurs->is_dir,&n_dir);CHKERRQ(ierr); 438 } 439 if (!deluxe_ctx->n_simple) { 440 deluxe_ctx->n_simple = n_dir + n_com; 441 ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 442 if (sub_schurs->is_vertices) { 443 PetscInt nmap; 444 const PetscInt *idxs; 445 446 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 447 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 448 if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %D != %D",nmap,n_com); 449 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 450 } 451 if (sub_schurs->is_dir) { 452 PetscInt nmap; 453 const PetscInt *idxs; 454 455 ierr = ISGetIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr); 456 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);CHKERRQ(ierr); 457 if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %D != %D",nmap,n_dir); 458 ierr = ISRestoreIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr); 459 } 460 ierr = PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 461 } else { 462 if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %D is different from the previous one computed %D",n_dir + n_com,deluxe_ctx->n_simple); 463 } 464 } else { 465 deluxe_ctx->n_simple = 0; 466 deluxe_ctx->idx_simple_B = 0; 467 } 468 PetscFunctionReturn(0); 469 } 470 471 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) 472 { 473 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 474 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 475 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 476 PetscScalar *matdata,*matdata2; 477 PetscInt i,max_subset_size,cum,cum2; 478 const PetscInt *idxs; 479 PetscBool newsetup = PETSC_FALSE; 480 PetscErrorCode ierr; 481 482 PetscFunctionBegin; 483 if (!sub_schurs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Missing PCBDDCSubSchurs"); 484 if (!sub_schurs->n_subs) PetscFunctionReturn(0); 485 486 /* Allocate arrays for subproblems */ 487 if (!deluxe_ctx->seq_n) { 488 deluxe_ctx->seq_n = sub_schurs->n_subs; 489 ierr = PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr); 490 newsetup = PETSC_TRUE; 491 } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %D is different from the sub_schurs %D",deluxe_ctx->seq_n,sub_schurs->n_subs); 492 493 /* the change of basis is just a reference to sub_schurs->change (if any) */ 494 deluxe_ctx->change = sub_schurs->change; 495 deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; 496 497 /* Create objects for deluxe */ 498 max_subset_size = 0; 499 for (i=0;i<sub_schurs->n_subs;i++) { 500 PetscInt subset_size; 501 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 502 max_subset_size = PetscMax(subset_size,max_subset_size); 503 } 504 if (newsetup) { 505 ierr = PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);CHKERRQ(ierr); 506 } 507 cum = cum2 = 0; 508 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr); 509 ierr = MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr); 510 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr); 511 for (i=0;i<deluxe_ctx->seq_n;i++) { 512 PetscInt subset_size; 513 514 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 515 if (newsetup) { 516 IS sub; 517 /* work vectors */ 518 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 519 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 520 521 /* scatters */ 522 ierr = ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);CHKERRQ(ierr); 523 ierr = VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr); 524 ierr = ISDestroy(&sub);CHKERRQ(ierr); 525 } 526 527 /* S_E_j */ 528 ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 529 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 530 531 /* \sum_k S^k_E_j */ 532 ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 534 ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);CHKERRQ(ierr); 535 ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);CHKERRQ(ierr); 536 if (sub_schurs->is_hermitian) { 537 ierr = MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);CHKERRQ(ierr); 538 } else { 539 ierr = MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);CHKERRQ(ierr); 540 } 541 if (pcbddc->deluxe_singlemat) { 542 Mat X,Y; 543 if (!sub_schurs->is_hermitian) { 544 ierr = MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);CHKERRQ(ierr); 545 } else { 546 ierr = PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 547 X = deluxe_ctx->seq_mat[i]; 548 } 549 ierr = MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);CHKERRQ(ierr); 550 if (!sub_schurs->is_hermitian) { 551 ierr = PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr); 552 } else { 553 ierr = MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr); 554 } 555 556 ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 557 ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 558 ierr = MatDestroy(&X);CHKERRQ(ierr); 559 if (deluxe_ctx->change) { 560 Mat C,CY; 561 562 if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis"); 563 ierr = KSPGetOperators(deluxe_ctx->change[i],&C,NULL);CHKERRQ(ierr); 564 ierr = MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);CHKERRQ(ierr); 565 ierr = MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr); 566 ierr = MatDestroy(&CY);CHKERRQ(ierr); 567 } 568 ierr = MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);CHKERRQ(ierr); 569 deluxe_ctx->seq_mat[i] = Y; 570 } 571 cum += subset_size; 572 cum2 += subset_size*subset_size; 573 } 574 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr); 575 ierr = MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr); 576 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr); 577 if (pcbddc->deluxe_singlemat) { 578 deluxe_ctx->change = NULL; 579 deluxe_ctx->change_with_qr = PETSC_FALSE; 580 } 581 582 if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { 583 for (i=0;i<deluxe_ctx->seq_n;i++) { 584 if (newsetup) { 585 PC pc; 586 587 ierr = KSPGetPC(deluxe_ctx->change[i],&pc);CHKERRQ(ierr); 588 ierr = PCSetType(pc,PCLU);CHKERRQ(ierr); 589 ierr = KSPSetFromOptions(deluxe_ctx->change[i]);CHKERRQ(ierr); 590 } 591 ierr = KSPSetUp(deluxe_ctx->change[i]);CHKERRQ(ierr); 592 } 593 } 594 PetscFunctionReturn(0); 595 } 596