1 2 /* 3 Include files needed for the PBJacobi preconditioner: 4 pcimpl.h - private include file intended for use by all preconditioners 5 */ 6 7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 8 9 /* 10 Private context (data structure) for the PBJacobi preconditioner. 11 */ 12 typedef struct { 13 const MatScalar *diag; 14 PetscInt bs,mbs; 15 } PC_PBJacobi; 16 17 static PetscErrorCode PCApply_PBJacobi_1(PC pc,Vec x,Vec y) 18 { 19 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 20 PetscErrorCode ierr; 21 PetscInt i,m = jac->mbs; 22 const MatScalar *diag = jac->diag; 23 const PetscScalar *xx; 24 PetscScalar *yy; 25 26 PetscFunctionBegin; 27 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 28 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 29 for (i=0; i<m; i++) yy[i] = diag[i]*xx[i]; 30 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 31 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 32 ierr = PetscLogFlops(m);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 static PetscErrorCode PCApply_PBJacobi_2(PC pc,Vec x,Vec y) 37 { 38 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 39 PetscErrorCode ierr; 40 PetscInt i,m = jac->mbs; 41 const MatScalar *diag = jac->diag; 42 PetscScalar x0,x1,*yy; 43 const PetscScalar *xx; 44 45 PetscFunctionBegin; 46 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 47 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 48 for (i=0; i<m; i++) { 49 x0 = xx[2*i]; x1 = xx[2*i+1]; 50 yy[2*i] = diag[0]*x0 + diag[2]*x1; 51 yy[2*i+1] = diag[1]*x0 + diag[3]*x1; 52 diag += 4; 53 } 54 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 55 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 56 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 static PetscErrorCode PCApply_PBJacobi_3(PC pc,Vec x,Vec y) 60 { 61 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 62 PetscErrorCode ierr; 63 PetscInt i,m = jac->mbs; 64 const MatScalar *diag = jac->diag; 65 PetscScalar x0,x1,x2,*yy; 66 const PetscScalar *xx; 67 68 PetscFunctionBegin; 69 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 70 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 71 for (i=0; i<m; i++) { 72 x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2]; 73 74 yy[3*i] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 75 yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 76 yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 77 diag += 9; 78 } 79 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 80 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 81 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 static PetscErrorCode PCApply_PBJacobi_4(PC pc,Vec x,Vec y) 85 { 86 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 87 PetscErrorCode ierr; 88 PetscInt i,m = jac->mbs; 89 const MatScalar *diag = jac->diag; 90 PetscScalar x0,x1,x2,x3,*yy; 91 const PetscScalar *xx; 92 93 PetscFunctionBegin; 94 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 95 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 96 for (i=0; i<m; i++) { 97 x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3]; 98 99 yy[4*i] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 100 yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 101 yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3; 102 yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3; 103 diag += 16; 104 } 105 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 106 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 107 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 108 PetscFunctionReturn(0); 109 } 110 static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y) 111 { 112 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 113 PetscErrorCode ierr; 114 PetscInt i,m = jac->mbs; 115 const MatScalar *diag = jac->diag; 116 PetscScalar x0,x1,x2,x3,x4,*yy; 117 const PetscScalar *xx; 118 119 PetscFunctionBegin; 120 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 121 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 122 for (i=0; i<m; i++) { 123 x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4]; 124 125 yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 126 yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 127 yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 128 yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 129 yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 130 diag += 25; 131 } 132 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 133 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 134 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 static PetscErrorCode PCApply_PBJacobi_6(PC pc,Vec x,Vec y) 138 { 139 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 140 PetscErrorCode ierr; 141 PetscInt i,m = jac->mbs; 142 const MatScalar *diag = jac->diag; 143 PetscScalar x0,x1,x2,x3,x4,x5,*yy; 144 const PetscScalar *xx; 145 146 PetscFunctionBegin; 147 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 148 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 149 for (i=0; i<m; i++) { 150 x0 = xx[6*i]; x1 = xx[6*i+1]; x2 = xx[6*i+2]; x3 = xx[6*i+3]; x4 = xx[6*i+4]; x5 = xx[6*i+5]; 151 152 yy[6*i] = diag[0]*x0 + diag[6]*x1 + diag[12]*x2 + diag[18]*x3 + diag[24]*x4 + diag[30]*x5; 153 yy[6*i+1] = diag[1]*x0 + diag[7]*x1 + diag[13]*x2 + diag[19]*x3 + diag[25]*x4 + diag[31]*x5; 154 yy[6*i+2] = diag[2]*x0 + diag[8]*x1 + diag[14]*x2 + diag[20]*x3 + diag[26]*x4 + diag[32]*x5; 155 yy[6*i+3] = diag[3]*x0 + diag[9]*x1 + diag[15]*x2 + diag[21]*x3 + diag[27]*x4 + diag[33]*x5; 156 yy[6*i+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2 + diag[22]*x3 + diag[28]*x4 + diag[34]*x5; 157 yy[6*i+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2 + diag[23]*x3 + diag[29]*x4 + diag[35]*x5; 158 diag += 36; 159 } 160 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 161 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 162 ierr = PetscLogFlops(66.0*m);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y) 166 { 167 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 168 PetscErrorCode ierr; 169 PetscInt i,m = jac->mbs; 170 const MatScalar *diag = jac->diag; 171 PetscScalar x0,x1,x2,x3,x4,x5,x6,*yy; 172 const PetscScalar *xx; 173 174 PetscFunctionBegin; 175 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 176 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 177 for (i=0; i<m; i++) { 178 x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6]; 179 180 yy[7*i] = diag[0]*x0 + diag[7]*x1 + diag[14]*x2 + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6; 181 yy[7*i+1] = diag[1]*x0 + diag[8]*x1 + diag[15]*x2 + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6; 182 yy[7*i+2] = diag[2]*x0 + diag[9]*x1 + diag[16]*x2 + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6; 183 yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2 + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6; 184 yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2 + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6; 185 yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2 + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6; 186 yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2 + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6; 187 diag += 49; 188 } 189 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 190 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 191 ierr = PetscLogFlops(91.0*m);CHKERRQ(ierr); /* 2*bs2 - bs */ 192 PetscFunctionReturn(0); 193 } 194 static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y) 195 { 196 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 197 PetscErrorCode ierr; 198 PetscInt i,ib,jb; 199 const PetscInt m = jac->mbs; 200 const PetscInt bs = jac->bs; 201 const MatScalar *diag = jac->diag; 202 PetscScalar *yy; 203 const PetscScalar *xx; 204 205 PetscFunctionBegin; 206 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 207 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 208 for (i=0; i<m; i++) { 209 for (ib=0; ib<bs; ib++) { 210 PetscScalar rowsum = 0; 211 for (jb=0; jb<bs; jb++) { 212 rowsum += diag[ib+jb*bs] * xx[bs*i+jb]; 213 } 214 yy[bs*i+ib] = rowsum; 215 } 216 diag += bs*bs; 217 } 218 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 219 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 220 ierr = PetscLogFlops((2.0*bs*bs-bs)*m);CHKERRQ(ierr); /* 2*bs2 - bs */ 221 PetscFunctionReturn(0); 222 } 223 224 static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc,Vec x,Vec y) 225 { 226 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 227 PetscErrorCode ierr; 228 PetscInt i,j,k,m = jac->mbs,bs=jac->bs; 229 const MatScalar *diag = jac->diag; 230 const PetscScalar *xx; 231 PetscScalar *yy; 232 233 PetscFunctionBegin; 234 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 235 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 236 for (i=0; i<m; i++) { 237 for (j=0; j<bs; j++) yy[i*bs+j] = 0.; 238 for (j=0; j<bs; j++) { 239 for (k=0; k<bs; k++) { 240 yy[i*bs+k] += diag[k*bs+j]*xx[i*bs+j]; 241 } 242 } 243 diag += bs*bs; 244 } 245 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 246 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 247 ierr = PetscLogFlops(m*bs*(2*bs-1));CHKERRQ(ierr); 248 PetscFunctionReturn(0); 249 } 250 251 /* -------------------------------------------------------------------------- */ 252 static PetscErrorCode PCSetUp_PBJacobi(PC pc) 253 { 254 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 255 PetscErrorCode ierr; 256 Mat A = pc->pmat; 257 MatFactorError err; 258 PetscInt nlocal; 259 260 PetscFunctionBegin; 261 ierr = MatInvertBlockDiagonal(A,&jac->diag);CHKERRQ(ierr); 262 ierr = MatFactorGetError(A,&err);CHKERRQ(ierr); 263 if (err) pc->failedreason = (PCFailedReason)err; 264 265 ierr = MatGetBlockSize(A,&jac->bs);CHKERRQ(ierr); 266 ierr = MatGetLocalSize(A,&nlocal,NULL);CHKERRQ(ierr); 267 jac->mbs = nlocal/jac->bs; 268 switch (jac->bs) { 269 case 1: 270 pc->ops->apply = PCApply_PBJacobi_1; 271 break; 272 case 2: 273 pc->ops->apply = PCApply_PBJacobi_2; 274 break; 275 case 3: 276 pc->ops->apply = PCApply_PBJacobi_3; 277 break; 278 case 4: 279 pc->ops->apply = PCApply_PBJacobi_4; 280 break; 281 case 5: 282 pc->ops->apply = PCApply_PBJacobi_5; 283 break; 284 case 6: 285 pc->ops->apply = PCApply_PBJacobi_6; 286 break; 287 case 7: 288 pc->ops->apply = PCApply_PBJacobi_7; 289 break; 290 default: 291 pc->ops->apply = PCApply_PBJacobi_N; 292 break; 293 } 294 pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N; 295 PetscFunctionReturn(0); 296 } 297 /* -------------------------------------------------------------------------- */ 298 static PetscErrorCode PCDestroy_PBJacobi(PC pc) 299 { 300 PetscErrorCode ierr; 301 302 PetscFunctionBegin; 303 /* 304 Free the private data structure that was hanging off the PC 305 */ 306 ierr = PetscFree(pc->data);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 static PetscErrorCode PCView_PBJacobi(PC pc,PetscViewer viewer) 311 { 312 PetscErrorCode ierr; 313 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 314 PetscBool iascii; 315 316 PetscFunctionBegin; 317 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 318 if (iascii) { 319 ierr = PetscViewerASCIIPrintf(viewer," point-block size %D\n",jac->bs);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 /* -------------------------------------------------------------------------- */ 325 /*MC 326 PCPBJACOBI - Point block Jacobi preconditioner 327 328 Notes: 329 See PCJACOBI for point Jacobi preconditioning 330 331 This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix 332 333 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 334 is detected a PETSc error is generated. 335 336 Developer Notes: 337 This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow 338 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 339 terminating KSP with a KSP_DIVERGED_NANORIF allowing 340 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 341 342 Developer Note: Perhaps should provide an option that allows generation of a valid preconditioner 343 even if a block is singular as the PCJACOBI does. 344 345 Level: beginner 346 347 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI 348 349 M*/ 350 351 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) 352 { 353 PC_PBJacobi *jac; 354 PetscErrorCode ierr; 355 356 PetscFunctionBegin; 357 /* 358 Creates the private data structure for this preconditioner and 359 attach it to the PC object. 360 */ 361 ierr = PetscNewLog(pc,&jac);CHKERRQ(ierr); 362 pc->data = (void*)jac; 363 364 /* 365 Initialize the pointers to vectors to ZERO; these will be used to store 366 diagonal entries of the matrix for fast preconditioner application. 367 */ 368 jac->diag = NULL; 369 370 /* 371 Set the pointers for the functions that are provided above. 372 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 373 are called, they will automatically call these functions. Note we 374 choose not to provide a couple of these functions since they are 375 not needed. 376 */ 377 pc->ops->apply = NULL; /*set depending on the block size */ 378 pc->ops->applytranspose = NULL; 379 pc->ops->setup = PCSetUp_PBJacobi; 380 pc->ops->destroy = PCDestroy_PBJacobi; 381 pc->ops->setfromoptions = NULL; 382 pc->ops->view = PCView_PBJacobi; 383 pc->ops->applyrichardson = NULL; 384 pc->ops->applysymmetricleft = NULL; 385 pc->ops->applysymmetricright = NULL; 386 PetscFunctionReturn(0); 387 } 388