1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 6 /* prototypes for deluxe functions */ 7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC); 11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "PCBDDCMatTransposeMatSolve_SeqDense" 15 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X) 16 { 17 Mat_SeqDense *mat = (Mat_SeqDense*)A->data; 18 PetscErrorCode ierr; 19 PetscScalar *b,*x; 20 PetscInt n; 21 PetscBLASInt nrhs,info,m; 22 PetscBool flg; 23 24 PetscFunctionBegin; 25 ierr = PetscBLASIntCast(A->rmap->n,&m);CHKERRQ(ierr); 26 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 27 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 28 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 29 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 30 31 ierr = MatGetSize(B,NULL,&n);CHKERRQ(ierr); 32 ierr = PetscBLASIntCast(n,&nrhs);CHKERRQ(ierr); 33 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 34 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 35 36 ierr = PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));CHKERRQ(ierr); 37 38 if (A->factortype == MAT_FACTOR_LU) { 39 #if defined(PETSC_MISSING_LAPACK_GETRS) 40 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable."); 41 #else 42 PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info)); 43 if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve"); 44 #endif 45 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 46 #if defined(PETSC_MISSING_LAPACK_POTRS) 47 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRS - Lapack routine is unavailable."); 48 #else 49 PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("L",&m,&nrhs,mat->v,&mat->lda,x,&m,&info)); 50 if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"POTRS Bad solve"); 51 #endif 52 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be factored to solve"); 53 54 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 55 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 56 ierr = PetscLogFlops(nrhs*(2.0*m*m - m));CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "PCBDDCScalingExtension_Basic" 63 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) 64 { 65 PC_IS* pcis = (PC_IS*)pc->data; 66 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 67 PetscErrorCode ierr; 68 69 PetscFunctionBegin; 70 /* Apply partition of unity */ 71 ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr); 72 ierr = VecSet(global_vector,0.0);CHKERRQ(ierr); 73 ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 74 ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 78 #undef __FUNCT__ 79 #define __FUNCT__ "PCBDDCScalingExtension_Deluxe" 80 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) 81 { 82 PC_IS* pcis=(PC_IS*)pc->data; 83 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 84 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr); 89 ierr = VecSet(y,0.0);CHKERRQ(ierr); 90 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 91 PetscInt i; 92 const PetscScalar *array_x,*array_D; 93 PetscScalar *array; 94 ierr = VecGetArrayRead(x,&array_x);CHKERRQ(ierr); 95 ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 96 ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr); 97 for (i=0;i<deluxe_ctx->n_simple;i++) { 98 array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]]; 99 } 100 ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr); 101 ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 102 ierr = VecRestoreArrayRead(x,&array_x);CHKERRQ(ierr); 103 } 104 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 105 if (deluxe_ctx->seq_mat) { 106 PetscInt i; 107 for (i=0;i<deluxe_ctx->seq_n;i++) { 108 if (deluxe_ctx->change) { 109 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 110 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 111 if (deluxe_ctx->change_with_qr) { 112 Mat change; 113 114 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 115 ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 116 } else { 117 ierr = KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 118 } 119 } else { 120 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 121 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 122 } 123 ierr = MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 124 if (deluxe_ctx->seq_mat_inv_sum[i]) { 125 PetscScalar *x; 126 127 ierr = VecGetArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr); 128 ierr = VecPlaceArray(deluxe_ctx->seq_work1[i],x);CHKERRQ(ierr); 129 ierr = VecRestoreArray(deluxe_ctx->seq_work2[i],&x);CHKERRQ(ierr); 130 ierr = MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 131 ierr = VecResetArray(deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 132 } 133 if (deluxe_ctx->change) { 134 Mat change; 135 136 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 137 ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 138 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 139 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 140 } else { 141 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 142 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 143 } 144 } 145 } 146 /* put local boundary part in global vector */ 147 ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 148 ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 #undef __FUNCT__ 153 #define __FUNCT__ "PCBDDCScalingExtension" 154 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) 155 { 156 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 157 PetscErrorCode ierr; 158 159 PetscFunctionBegin; 160 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 161 PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2); 162 PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3); 163 if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n"); 164 ierr = PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr); 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNCT__ 169 #define __FUNCT__ "PCBDDCScalingRestriction_Basic" 170 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) 171 { 172 PetscErrorCode ierr; 173 PC_IS *pcis = (PC_IS*)pc->data; 174 175 PetscFunctionBegin; 176 ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 177 ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 178 /* Apply partition of unity */ 179 ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr); 180 PetscFunctionReturn(0); 181 } 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "PCBDDCScalingRestriction_Deluxe" 185 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) 186 { 187 PC_IS* pcis=(PC_IS*)pc->data; 188 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 189 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 /* get local boundary part of global vector */ 194 ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 195 ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 196 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 197 PetscInt i; 198 PetscScalar *array_y; 199 const PetscScalar *array_D; 200 ierr = VecGetArray(y,&array_y);CHKERRQ(ierr); 201 ierr = VecGetArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 202 for (i=0;i<deluxe_ctx->n_simple;i++) { 203 array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; 204 } 205 ierr = VecRestoreArrayRead(pcis->D,&array_D);CHKERRQ(ierr); 206 ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr); 207 } 208 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 209 if (deluxe_ctx->seq_mat) { 210 PetscInt i; 211 for (i=0;i<deluxe_ctx->seq_n;i++) { 212 if (deluxe_ctx->change) { 213 Mat change; 214 215 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 216 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 217 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 218 ierr = MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 219 } else { 220 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 221 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 222 } 223 if (deluxe_ctx->seq_mat_inv_sum[i]) { 224 PetscScalar *x; 225 226 ierr = VecGetArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr); 227 ierr = VecPlaceArray(deluxe_ctx->seq_work2[i],x);CHKERRQ(ierr); 228 ierr = VecRestoreArray(deluxe_ctx->seq_work1[i],&x);CHKERRQ(ierr); 229 ierr = MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 230 ierr = VecResetArray(deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 231 } 232 ierr = MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 233 if (deluxe_ctx->change) { 234 if (deluxe_ctx->change_with_qr) { 235 Mat change; 236 237 ierr = KSPGetOperators(deluxe_ctx->change[i],&change,NULL);CHKERRQ(ierr); 238 ierr = MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 239 } else { 240 ierr = KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 241 } 242 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 243 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 244 } else { 245 ierr = VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 246 ierr = VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 247 } 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "PCBDDCScalingRestriction" 255 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) 256 { 257 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 258 PetscErrorCode ierr; 259 260 PetscFunctionBegin; 261 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 262 PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2); 263 PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3); 264 if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n"); 265 ierr = PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "PCBDDCScalingSetUp" 271 PetscErrorCode PCBDDCScalingSetUp(PC pc) 272 { 273 PC_IS* pcis=(PC_IS*)pc->data; 274 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 279 /* create work vector for the operator */ 280 ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr); 281 ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr); 282 /* always rebuild pcis->D */ 283 if (pcis->use_stiffness_scaling) { 284 ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr); 285 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 286 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 287 } 288 ierr = VecCopy(pcis->D,pcis->vec1_B);CHKERRQ(ierr); 289 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 290 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 291 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 292 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 293 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 294 ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 295 /* now setup */ 296 if (pcbddc->use_deluxe_scaling) { 297 if (!pcbddc->deluxe_ctx) { 298 ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr); 299 } 300 ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr); 301 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr); 302 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr); 303 } else { 304 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr); 305 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr); 306 } 307 308 /* test */ 309 if (pcbddc->dbg_flag) { 310 Mat B0_B = NULL; 311 Vec B0_Bv = NULL, B0_Bv2 = NULL; 312 Vec vec2_global; 313 PetscViewer viewer = pcbddc->dbg_viewer; 314 PetscReal error; 315 316 /* extension -> from local to parallel */ 317 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 318 ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr); 319 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 320 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 321 ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr); 322 ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr); 323 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 324 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 325 if (pcbddc->benign_n) { 326 IS is_dummy; 327 328 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 329 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 330 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 331 ierr = MatCreateVecs(B0_B,NULL,&B0_Bv);CHKERRQ(ierr); 332 ierr = VecDuplicate(B0_Bv,&B0_Bv2);CHKERRQ(ierr); 333 ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv);CHKERRQ(ierr); 334 } 335 ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr); 336 if (pcbddc->benign_saddle_point) { 337 PetscReal errorl = 0.; 338 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 339 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 340 if (pcbddc->benign_n) { 341 ierr = MatMult(B0_B,pcis->vec1_B,B0_Bv2);CHKERRQ(ierr); 342 ierr = VecAXPY(B0_Bv,-1.0,B0_Bv2);CHKERRQ(ierr); 343 ierr = VecNorm(B0_Bv,NORM_INFINITY,&errorl);CHKERRQ(ierr); 344 } 345 ierr = MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 346 ierr = PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);CHKERRQ(ierr); 347 } 348 ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr); 349 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr); 350 ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr); 351 ierr = VecDestroy(&vec2_global);CHKERRQ(ierr); 352 353 /* restriction -> from parallel to local */ 354 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 355 ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr); 356 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 357 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 358 ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr); 359 ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr); 360 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 361 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 362 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr); 363 ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr); 364 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 365 ierr = VecDestroy(&B0_Bv);CHKERRQ(ierr); 366 ierr = VecDestroy(&B0_Bv2);CHKERRQ(ierr); 367 } 368 PetscFunctionReturn(0); 369 } 370 371 #undef __FUNCT__ 372 #define __FUNCT__ "PCBDDCScalingDestroy" 373 PetscErrorCode PCBDDCScalingDestroy(PC pc) 374 { 375 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 376 PetscErrorCode ierr; 377 378 PetscFunctionBegin; 379 if (pcbddc->deluxe_ctx) { 380 ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr); 381 } 382 ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr); 383 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr); 384 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCScalingCreate_Deluxe" 390 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) 391 { 392 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 393 PCBDDCDeluxeScaling deluxe_ctx; 394 PetscErrorCode ierr; 395 396 PetscFunctionBegin; 397 ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr); 398 pcbddc->deluxe_ctx = deluxe_ctx; 399 PetscFunctionReturn(0); 400 } 401 402 #undef __FUNCT__ 403 #define __FUNCT__ "PCBDDCScalingDestroy_Deluxe" 404 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) 405 { 406 PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 407 PetscErrorCode ierr; 408 409 PetscFunctionBegin; 410 ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr); 411 ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr); 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "PCBDDCScalingReset_Deluxe_Solvers" 417 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) 418 { 419 PetscInt i; 420 PetscErrorCode ierr; 421 422 PetscFunctionBegin; 423 ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 424 deluxe_ctx->n_simple = 0; 425 for (i=0;i<deluxe_ctx->seq_n;i++) { 426 ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr); 427 ierr = VecDestroy(&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 428 ierr = VecDestroy(&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 429 ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 430 ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 431 } 432 ierr = PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr); 433 ierr = PetscFree(deluxe_ctx->workspace);CHKERRQ(ierr); 434 deluxe_ctx->seq_n = 0; 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNCT__ 439 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe" 440 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) 441 { 442 PC_IS *pcis=(PC_IS*)pc->data; 443 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 444 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 445 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 446 PetscErrorCode ierr; 447 448 PetscFunctionBegin; 449 /* reset data structures if the topology has changed */ 450 if (pcbddc->recompute_topography) { 451 ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr); 452 } 453 454 /* Compute data structures to solve sequential problems */ 455 ierr = PCBDDCScalingSetUp_Deluxe_Private(pc);CHKERRQ(ierr); 456 457 /* diagonal scaling on interface dofs not contained in cc */ 458 if (sub_schurs->is_vertices || sub_schurs->is_dir) { 459 PetscInt n_com,n_dir; 460 n_com = 0; 461 if (sub_schurs->is_vertices) { 462 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_com);CHKERRQ(ierr); 463 } 464 n_dir = 0; 465 if (sub_schurs->is_dir) { 466 ierr = ISGetLocalSize(sub_schurs->is_dir,&n_dir);CHKERRQ(ierr); 467 } 468 if (!deluxe_ctx->n_simple) { 469 deluxe_ctx->n_simple = n_dir + n_com; 470 ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 471 if (sub_schurs->is_vertices) { 472 PetscInt nmap; 473 const PetscInt *idxs; 474 475 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 476 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 477 if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %d != %d",nmap,n_com); 478 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 479 } 480 if (sub_schurs->is_dir) { 481 PetscInt nmap; 482 const PetscInt *idxs; 483 484 ierr = ISGetIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr); 485 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);CHKERRQ(ierr); 486 if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %d != %d",nmap,n_dir); 487 ierr = ISRestoreIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr); 488 } 489 ierr = PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 490 } else { 491 if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %d is different from the previous one computed %d",n_dir + n_com,deluxe_ctx->n_simple); 492 } 493 } else { 494 deluxe_ctx->n_simple = 0; 495 deluxe_ctx->idx_simple_B = 0; 496 } 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Private" 502 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) 503 { 504 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 505 PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 506 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 507 PetscScalar *matdata,*matdata2; 508 PetscInt i,max_subset_size,cum,cum2; 509 const PetscInt *idxs; 510 PetscBool newsetup = PETSC_FALSE; 511 PetscErrorCode ierr; 512 513 PetscFunctionBegin; 514 if (!sub_schurs->n_subs) { 515 PetscFunctionReturn(0); 516 } 517 518 /* Allocate arrays for subproblems */ 519 if (!deluxe_ctx->seq_n) { 520 deluxe_ctx->seq_n = sub_schurs->n_subs; 521 ierr = PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr); 522 newsetup = PETSC_TRUE; 523 } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) { 524 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %d is different from the sub_schurs %d",deluxe_ctx->seq_n,sub_schurs->n_subs); 525 } 526 deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; 527 528 /* Create objects for deluxe */ 529 max_subset_size = 0; 530 for (i=0;i<sub_schurs->n_subs;i++) { 531 PetscInt subset_size; 532 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 533 max_subset_size = PetscMax(subset_size,max_subset_size); 534 } 535 if (newsetup) { 536 ierr = PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);CHKERRQ(ierr); 537 } 538 cum = cum2 = 0; 539 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr); 540 ierr = MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr); 541 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr); 542 for (i=0;i<deluxe_ctx->seq_n;i++) { 543 PetscInt subset_size; 544 545 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 546 if (newsetup) { 547 IS sub; 548 /* work vectors */ 549 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); 550 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); 551 552 /* scatters */ 553 ierr = ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);CHKERRQ(ierr); 554 ierr = VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr); 555 ierr = ISDestroy(&sub);CHKERRQ(ierr); 556 } 557 558 /* S_E_j */ 559 ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 560 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 561 562 /* \sum_k S^k_E_j */ 563 ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 564 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 565 566 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 567 ierr = MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);CHKERRQ(ierr); 568 } else { 569 ierr = MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);CHKERRQ(ierr); 570 } 571 if (pcbddc->deluxe_singlemat) { 572 Mat X,Y; 573 574 if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) { 575 ierr = MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);CHKERRQ(ierr); 576 } else { 577 ierr = PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 578 X = deluxe_ctx->seq_mat[i]; 579 } 580 ierr = MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);CHKERRQ(ierr); 581 if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) { 582 ierr = PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr); 583 } else { 584 ierr = MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr); 585 } 586 587 ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); 588 ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); 589 ierr = MatDestroy(&X);CHKERRQ(ierr); 590 ierr = MatTranspose(Y,MAT_REUSE_MATRIX,&Y);CHKERRQ(ierr); 591 deluxe_ctx->seq_mat[i] = Y; 592 } 593 cum += subset_size; 594 cum2 += subset_size*subset_size; 595 } 596 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr); 597 ierr = MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr); 598 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr); 599 600 /* the change of basis is just a reference to sub_schurs->change (if any) */ 601 deluxe_ctx->change = sub_schurs->change; 602 if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { 603 for (i=0;i<deluxe_ctx->seq_n;i++) { 604 if (newsetup) { 605 PC pc; 606 607 ierr = KSPGetPC(deluxe_ctx->change[i],&pc);CHKERRQ(ierr); 608 ierr = PCSetType(pc,PCLU);CHKERRQ(ierr); 609 ierr = KSPSetFromOptions(deluxe_ctx->change[i]);CHKERRQ(ierr); 610 } 611 ierr = KSPSetUp(deluxe_ctx->change[i]);CHKERRQ(ierr); 612 } 613 } 614 PetscFunctionReturn(0); 615 } 616