1 2 /* 3 Include files needed for the PBJacobi preconditioner: 4 pcimpl.h - private include file intended for use by all preconditioners 5 */ 6 7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 8 9 /* 10 Private context (data structure) for the PBJacobi preconditioner. 11 */ 12 typedef struct { 13 const MatScalar *diag; 14 PetscInt bs,mbs; 15 } PC_PBJacobi; 16 17 18 static PetscErrorCode PCApply_PBJacobi_1(PC pc,Vec x,Vec y) 19 { 20 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 21 PetscErrorCode ierr; 22 PetscInt i,m = jac->mbs; 23 const MatScalar *diag = jac->diag; 24 const PetscScalar *xx; 25 PetscScalar *yy; 26 27 PetscFunctionBegin; 28 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 29 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 30 for (i=0; i<m; i++) yy[i] = diag[i]*xx[i]; 31 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 32 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 33 ierr = PetscLogFlops(m);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 37 static PetscErrorCode PCApply_PBJacobi_2(PC pc,Vec x,Vec y) 38 { 39 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 40 PetscErrorCode ierr; 41 PetscInt i,m = jac->mbs; 42 const MatScalar *diag = jac->diag; 43 PetscScalar x0,x1,*yy; 44 const PetscScalar *xx; 45 46 PetscFunctionBegin; 47 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 48 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 49 for (i=0; i<m; i++) { 50 x0 = xx[2*i]; x1 = xx[2*i+1]; 51 yy[2*i] = diag[0]*x0 + diag[2]*x1; 52 yy[2*i+1] = diag[1]*x0 + diag[3]*x1; 53 diag += 4; 54 } 55 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 56 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 57 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 static PetscErrorCode PCApply_PBJacobi_3(PC pc,Vec x,Vec y) 61 { 62 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 63 PetscErrorCode ierr; 64 PetscInt i,m = jac->mbs; 65 const MatScalar *diag = jac->diag; 66 PetscScalar x0,x1,x2,*yy; 67 const PetscScalar *xx; 68 69 PetscFunctionBegin; 70 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 71 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 72 for (i=0; i<m; i++) { 73 x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2]; 74 75 yy[3*i] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 76 yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 77 yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 78 diag += 9; 79 } 80 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 81 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 82 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 static PetscErrorCode PCApply_PBJacobi_4(PC pc,Vec x,Vec y) 86 { 87 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 88 PetscErrorCode ierr; 89 PetscInt i,m = jac->mbs; 90 const MatScalar *diag = jac->diag; 91 PetscScalar x0,x1,x2,x3,*yy; 92 const PetscScalar *xx; 93 94 PetscFunctionBegin; 95 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 96 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 97 for (i=0; i<m; i++) { 98 x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3]; 99 100 yy[4*i] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 101 yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 102 yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3; 103 yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3; 104 diag += 16; 105 } 106 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 107 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 108 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y) 112 { 113 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 114 PetscErrorCode ierr; 115 PetscInt i,m = jac->mbs; 116 const MatScalar *diag = jac->diag; 117 PetscScalar x0,x1,x2,x3,x4,*yy; 118 const PetscScalar *xx; 119 120 PetscFunctionBegin; 121 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 122 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 123 for (i=0; i<m; i++) { 124 x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4]; 125 126 yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 127 yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 128 yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 129 yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 130 yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 131 diag += 25; 132 } 133 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 134 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 135 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 136 PetscFunctionReturn(0); 137 } 138 static PetscErrorCode PCApply_PBJacobi_6(PC pc,Vec x,Vec y) 139 { 140 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 141 PetscErrorCode ierr; 142 PetscInt i,m = jac->mbs; 143 const MatScalar *diag = jac->diag; 144 PetscScalar x0,x1,x2,x3,x4,x5,*yy; 145 const PetscScalar *xx; 146 147 PetscFunctionBegin; 148 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 149 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 150 for (i=0; i<m; i++) { 151 x0 = xx[6*i]; x1 = xx[6*i+1]; x2 = xx[6*i+2]; x3 = xx[6*i+3]; x4 = xx[6*i+4]; x5 = xx[6*i+5]; 152 153 yy[6*i] = diag[0]*x0 + diag[6]*x1 + diag[12]*x2 + diag[18]*x3 + diag[24]*x4 + diag[30]*x5; 154 yy[6*i+1] = diag[1]*x0 + diag[7]*x1 + diag[13]*x2 + diag[19]*x3 + diag[25]*x4 + diag[31]*x5; 155 yy[6*i+2] = diag[2]*x0 + diag[8]*x1 + diag[14]*x2 + diag[20]*x3 + diag[26]*x4 + diag[32]*x5; 156 yy[6*i+3] = diag[3]*x0 + diag[9]*x1 + diag[15]*x2 + diag[21]*x3 + diag[27]*x4 + diag[33]*x5; 157 yy[6*i+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2 + diag[22]*x3 + diag[28]*x4 + diag[34]*x5; 158 yy[6*i+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2 + diag[23]*x3 + diag[29]*x4 + diag[35]*x5; 159 diag += 36; 160 } 161 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 162 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 163 ierr = PetscLogFlops(66.0*m);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y) 167 { 168 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 169 PetscErrorCode ierr; 170 PetscInt i,m = jac->mbs; 171 const MatScalar *diag = jac->diag; 172 PetscScalar x0,x1,x2,x3,x4,x5,x6,*yy; 173 const PetscScalar *xx; 174 175 PetscFunctionBegin; 176 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 177 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 178 for (i=0; i<m; i++) { 179 x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6]; 180 181 yy[7*i] = diag[0]*x0 + diag[7]*x1 + diag[14]*x2 + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6; 182 yy[7*i+1] = diag[1]*x0 + diag[8]*x1 + diag[15]*x2 + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6; 183 yy[7*i+2] = diag[2]*x0 + diag[9]*x1 + diag[16]*x2 + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6; 184 yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2 + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6; 185 yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2 + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6; 186 yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2 + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6; 187 yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2 + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6; 188 diag += 49; 189 } 190 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 191 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 192 ierr = PetscLogFlops(91.0*m);CHKERRQ(ierr); /* 2*bs2 - bs */ 193 PetscFunctionReturn(0); 194 } 195 static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y) 196 { 197 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 198 PetscErrorCode ierr; 199 PetscInt i,ib,jb; 200 const PetscInt m = jac->mbs; 201 const PetscInt bs = jac->bs; 202 const MatScalar *diag = jac->diag; 203 PetscScalar *yy; 204 const PetscScalar *xx; 205 206 PetscFunctionBegin; 207 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 208 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 209 for (i=0; i<m; i++) { 210 for (ib=0; ib<bs; ib++){ 211 PetscScalar rowsum = 0; 212 for (jb=0; jb<bs; jb++){ 213 rowsum += diag[ib+jb*bs] * xx[bs*i+jb]; 214 } 215 yy[bs*i+ib] = rowsum; 216 } 217 diag += bs*bs; 218 } 219 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 220 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 221 ierr = PetscLogFlops((2.0*bs*bs-bs)*m);CHKERRQ(ierr); /* 2*bs2 - bs */ 222 PetscFunctionReturn(0); 223 } 224 225 static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc,Vec x,Vec y) 226 { 227 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 228 PetscErrorCode ierr; 229 PetscInt i,j,k,m = jac->mbs,bs=jac->bs; 230 const MatScalar *diag = jac->diag; 231 const PetscScalar *xx; 232 PetscScalar *yy; 233 234 PetscFunctionBegin; 235 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 236 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 237 for (i=0; i<m; i++) { 238 for (j=0; j<bs; j++) yy[i*bs+j] = 0.; 239 for (j=0; j<bs; j++) { 240 for (k=0; k<bs; k++) { 241 yy[i*bs+k] += diag[k*bs+j]*xx[i*bs+j]; 242 } 243 } 244 diag += bs*bs; 245 } 246 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 247 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 248 ierr = PetscLogFlops(m*bs*(2*bs-1));CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 /* -------------------------------------------------------------------------- */ 253 static PetscErrorCode PCSetUp_PBJacobi(PC pc) 254 { 255 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 256 PetscErrorCode ierr; 257 Mat A = pc->pmat; 258 MatFactorError err; 259 PetscInt nlocal; 260 261 PetscFunctionBegin; 262 ierr = MatInvertBlockDiagonal(A,&jac->diag);CHKERRQ(ierr); 263 ierr = MatFactorGetError(A,&err);CHKERRQ(ierr); 264 if (err) pc->failedreason = (PCFailedReason)err; 265 266 ierr = MatGetBlockSize(A,&jac->bs);CHKERRQ(ierr); 267 ierr = MatGetLocalSize(A,&nlocal,NULL);CHKERRQ(ierr); 268 jac->mbs = nlocal/jac->bs; 269 switch (jac->bs) { 270 case 1: 271 pc->ops->apply = PCApply_PBJacobi_1; 272 break; 273 case 2: 274 pc->ops->apply = PCApply_PBJacobi_2; 275 break; 276 case 3: 277 pc->ops->apply = PCApply_PBJacobi_3; 278 break; 279 case 4: 280 pc->ops->apply = PCApply_PBJacobi_4; 281 break; 282 case 5: 283 pc->ops->apply = PCApply_PBJacobi_5; 284 break; 285 case 6: 286 pc->ops->apply = PCApply_PBJacobi_6; 287 break; 288 case 7: 289 pc->ops->apply = PCApply_PBJacobi_7; 290 break; 291 default: 292 pc->ops->apply = PCApply_PBJacobi_N; 293 break; 294 } 295 pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N; 296 PetscFunctionReturn(0); 297 } 298 /* -------------------------------------------------------------------------- */ 299 static PetscErrorCode PCDestroy_PBJacobi(PC pc) 300 { 301 PetscErrorCode ierr; 302 303 PetscFunctionBegin; 304 /* 305 Free the private data structure that was hanging off the PC 306 */ 307 ierr = PetscFree(pc->data);CHKERRQ(ierr); 308 PetscFunctionReturn(0); 309 } 310 311 static PetscErrorCode PCView_PBJacobi(PC pc,PetscViewer viewer) 312 { 313 PetscErrorCode ierr; 314 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 315 PetscBool iascii; 316 317 PetscFunctionBegin; 318 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 319 if (iascii) { 320 ierr = PetscViewerASCIIPrintf(viewer," point-block size %D\n",jac->bs);CHKERRQ(ierr); 321 } 322 PetscFunctionReturn(0); 323 } 324 325 /* -------------------------------------------------------------------------- */ 326 /*MC 327 PCPBJACOBI - Point block Jacobi preconditioner 328 329 330 Notes: 331 See PCJACOBI for point Jacobi preconditioning 332 333 This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix 334 335 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 336 is detected a PETSc error is generated. 337 338 Developer Notes: 339 This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow 340 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 341 terminating KSP with a KSP_DIVERGED_NANORIF allowing 342 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 343 344 Developer Note: Perhaps should provide an option that allows generation of a valid preconditioner 345 even if a block is singular as the PCJACOBI does. 346 347 Level: beginner 348 349 350 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI 351 352 M*/ 353 354 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) 355 { 356 PC_PBJacobi *jac; 357 PetscErrorCode ierr; 358 359 PetscFunctionBegin; 360 /* 361 Creates the private data structure for this preconditioner and 362 attach it to the PC object. 363 */ 364 ierr = PetscNewLog(pc,&jac);CHKERRQ(ierr); 365 pc->data = (void*)jac; 366 367 /* 368 Initialize the pointers to vectors to ZERO; these will be used to store 369 diagonal entries of the matrix for fast preconditioner application. 370 */ 371 jac->diag = NULL; 372 373 /* 374 Set the pointers for the functions that are provided above. 375 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 376 are called, they will automatically call these functions. Note we 377 choose not to provide a couple of these functions since they are 378 not needed. 379 */ 380 pc->ops->apply = NULL; /*set depending on the block size */ 381 pc->ops->applytranspose = NULL; 382 pc->ops->setup = PCSetUp_PBJacobi; 383 pc->ops->destroy = PCDestroy_PBJacobi; 384 pc->ops->setfromoptions = NULL; 385 pc->ops->view = PCView_PBJacobi; 386 pc->ops->applyrichardson = NULL; 387 pc->ops->applysymmetricleft = NULL; 388 pc->ops->applysymmetricright = NULL; 389 PetscFunctionReturn(0); 390 } 391 392 393