1 2 /* 3 Include files needed for the PBJacobi preconditioner: 4 pcimpl.h - private include file intended for use by all preconditioners 5 */ 6 7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 8 9 /* 10 Private context (data structure) for the PBJacobi preconditioner. 11 */ 12 typedef struct { 13 const MatScalar *diag; 14 PetscInt bs,mbs; 15 } PC_PBJacobi; 16 17 18 static PetscErrorCode PCApply_PBJacobi_1(PC pc,Vec x,Vec y) 19 { 20 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 21 PetscErrorCode ierr; 22 PetscInt i,m = jac->mbs; 23 const MatScalar *diag = jac->diag; 24 const PetscScalar *xx; 25 PetscScalar *yy; 26 27 PetscFunctionBegin; 28 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 29 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 30 for (i=0; i<m; i++) yy[i] = diag[i]*xx[i]; 31 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 32 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 33 ierr = PetscLogFlops(m);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 37 static PetscErrorCode PCApply_PBJacobi_2(PC pc,Vec x,Vec y) 38 { 39 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 40 PetscErrorCode ierr; 41 PetscInt i,m = jac->mbs; 42 const MatScalar *diag = jac->diag; 43 PetscScalar x0,x1,*yy; 44 const PetscScalar *xx; 45 46 PetscFunctionBegin; 47 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 48 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 49 for (i=0; i<m; i++) { 50 x0 = xx[2*i]; x1 = xx[2*i+1]; 51 yy[2*i] = diag[0]*x0 + diag[2]*x1; 52 yy[2*i+1] = diag[1]*x0 + diag[3]*x1; 53 diag += 4; 54 } 55 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 56 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 57 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 static PetscErrorCode PCApply_PBJacobi_3(PC pc,Vec x,Vec y) 61 { 62 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 63 PetscErrorCode ierr; 64 PetscInt i,m = jac->mbs; 65 const MatScalar *diag = jac->diag; 66 PetscScalar x0,x1,x2,*yy; 67 const PetscScalar *xx; 68 69 PetscFunctionBegin; 70 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 71 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 72 for (i=0; i<m; i++) { 73 x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2]; 74 75 yy[3*i] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 76 yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 77 yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 78 diag += 9; 79 } 80 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 81 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 82 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 static PetscErrorCode PCApply_PBJacobi_4(PC pc,Vec x,Vec y) 86 { 87 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 88 PetscErrorCode ierr; 89 PetscInt i,m = jac->mbs; 90 const MatScalar *diag = jac->diag; 91 PetscScalar x0,x1,x2,x3,*yy; 92 const PetscScalar *xx; 93 94 PetscFunctionBegin; 95 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 96 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 97 for (i=0; i<m; i++) { 98 x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3]; 99 100 yy[4*i] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 101 yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 102 yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3; 103 yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3; 104 diag += 16; 105 } 106 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 107 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 108 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y) 112 { 113 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 114 PetscErrorCode ierr; 115 PetscInt i,m = jac->mbs; 116 const MatScalar *diag = jac->diag; 117 PetscScalar x0,x1,x2,x3,x4,*yy; 118 const PetscScalar *xx; 119 120 PetscFunctionBegin; 121 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 122 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 123 for (i=0; i<m; i++) { 124 x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4]; 125 126 yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 127 yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 128 yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 129 yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 130 yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 131 diag += 25; 132 } 133 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 134 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 135 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 136 PetscFunctionReturn(0); 137 } 138 static PetscErrorCode PCApply_PBJacobi_6(PC pc,Vec x,Vec y) 139 { 140 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 141 PetscErrorCode ierr; 142 PetscInt i,m = jac->mbs; 143 const MatScalar *diag = jac->diag; 144 PetscScalar x0,x1,x2,x3,x4,x5,*yy; 145 const PetscScalar *xx; 146 147 PetscFunctionBegin; 148 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 149 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 150 for (i=0; i<m; i++) { 151 x0 = xx[6*i]; x1 = xx[6*i+1]; x2 = xx[6*i+2]; x3 = xx[6*i+3]; x4 = xx[6*i+4]; x5 = xx[6*i+5]; 152 153 yy[6*i] = diag[0]*x0 + diag[6]*x1 + diag[12]*x2 + diag[18]*x3 + diag[24]*x4 + diag[30]*x5; 154 yy[6*i+1] = diag[1]*x0 + diag[7]*x1 + diag[13]*x2 + diag[19]*x3 + diag[25]*x4 + diag[31]*x5; 155 yy[6*i+2] = diag[2]*x0 + diag[8]*x1 + diag[14]*x2 + diag[20]*x3 + diag[26]*x4 + diag[32]*x5; 156 yy[6*i+3] = diag[3]*x0 + diag[9]*x1 + diag[15]*x2 + diag[21]*x3 + diag[27]*x4 + diag[33]*x5; 157 yy[6*i+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2 + diag[22]*x3 + diag[28]*x4 + diag[34]*x5; 158 yy[6*i+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2 + diag[23]*x3 + diag[29]*x4 + diag[35]*x5; 159 diag += 36; 160 } 161 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 162 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 163 ierr = PetscLogFlops(66.0*m);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y) 167 { 168 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 169 PetscErrorCode ierr; 170 PetscInt i,m = jac->mbs; 171 const MatScalar *diag = jac->diag; 172 PetscScalar x0,x1,x2,x3,x4,x5,x6,*yy; 173 const PetscScalar *xx; 174 175 PetscFunctionBegin; 176 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 177 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 178 for (i=0; i<m; i++) { 179 x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6]; 180 181 yy[7*i] = diag[0]*x0 + diag[7]*x1 + diag[14]*x2 + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6; 182 yy[7*i+1] = diag[1]*x0 + diag[8]*x1 + diag[15]*x2 + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6; 183 yy[7*i+2] = diag[2]*x0 + diag[9]*x1 + diag[16]*x2 + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6; 184 yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2 + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6; 185 yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2 + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6; 186 yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2 + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6; 187 yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2 + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6; 188 diag += 49; 189 } 190 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 191 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 192 ierr = PetscLogFlops(91*m);CHKERRQ(ierr); /* 2*bs2 - bs */ 193 PetscFunctionReturn(0); 194 } 195 static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y) 196 { 197 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 198 PetscErrorCode ierr; 199 PetscInt i,ib,jb; 200 const PetscInt m = jac->mbs; 201 const PetscInt bs = jac->bs; 202 const MatScalar *diag = jac->diag; 203 PetscScalar *yy; 204 const PetscScalar *xx; 205 206 PetscFunctionBegin; 207 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 208 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 209 for (i=0; i<m; i++) { 210 for (ib=0; ib<bs; ib++){ 211 PetscScalar rowsum = 0; 212 for (jb=0; jb<bs; jb++){ 213 rowsum += diag[ib+jb*bs] * xx[bs*i+jb]; 214 } 215 yy[bs*i+ib] = rowsum; 216 } 217 diag += bs*bs; 218 } 219 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 220 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 221 ierr = PetscLogFlops(2*bs*bs-bs);CHKERRQ(ierr); /* 2*bs2 - bs */ 222 PetscFunctionReturn(0); 223 } 224 /* -------------------------------------------------------------------------- */ 225 static PetscErrorCode PCSetUp_PBJacobi(PC pc) 226 { 227 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 228 PetscErrorCode ierr; 229 Mat A = pc->pmat; 230 MatFactorError err; 231 PetscInt nlocal; 232 233 PetscFunctionBegin; 234 ierr = MatInvertBlockDiagonal(A,&jac->diag);CHKERRQ(ierr); 235 ierr = MatFactorGetError(A,&err);CHKERRQ(ierr); 236 if (err) pc->failedreason = (PCFailedReason)err; 237 238 ierr = MatGetBlockSize(A,&jac->bs);CHKERRQ(ierr); 239 ierr = MatGetLocalSize(A,&nlocal,NULL);CHKERRQ(ierr); 240 jac->mbs = nlocal/jac->bs; 241 switch (jac->bs) { 242 case 1: 243 pc->ops->apply = PCApply_PBJacobi_1; 244 break; 245 case 2: 246 pc->ops->apply = PCApply_PBJacobi_2; 247 break; 248 case 3: 249 pc->ops->apply = PCApply_PBJacobi_3; 250 break; 251 case 4: 252 pc->ops->apply = PCApply_PBJacobi_4; 253 break; 254 case 5: 255 pc->ops->apply = PCApply_PBJacobi_5; 256 break; 257 case 6: 258 pc->ops->apply = PCApply_PBJacobi_6; 259 break; 260 case 7: 261 pc->ops->apply = PCApply_PBJacobi_7; 262 break; 263 default: 264 pc->ops->apply = PCApply_PBJacobi_N; 265 break; 266 } 267 PetscFunctionReturn(0); 268 } 269 /* -------------------------------------------------------------------------- */ 270 static PetscErrorCode PCDestroy_PBJacobi(PC pc) 271 { 272 PetscErrorCode ierr; 273 274 PetscFunctionBegin; 275 /* 276 Free the private data structure that was hanging off the PC 277 */ 278 ierr = PetscFree(pc->data);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 static PetscErrorCode PCView_PBJacobi(PC pc,PetscViewer viewer) 283 { 284 PetscErrorCode ierr; 285 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 286 PetscBool iascii; 287 288 PetscFunctionBegin; 289 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 290 if (iascii) { 291 ierr = PetscViewerASCIIPrintf(viewer," point-block size %D\n",jac->bs);CHKERRQ(ierr); 292 } 293 PetscFunctionReturn(0); 294 } 295 296 /* -------------------------------------------------------------------------- */ 297 /*MC 298 PCPBJACOBI - Point block Jacobi preconditioner 299 300 301 Notes: See PCJACOBI for point Jacobi preconditioning 302 303 This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix 304 305 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 306 is detected a PETSc error is generated. 307 308 Developer Notes: This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow 309 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 310 terminating KSP with a KSP_DIVERGED_NANORIF allowing 311 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 312 313 Developer Note: Perhaps should provide an option that allows generation of a valid preconditioner 314 even if a block is singular as the PCJACOBI does. 315 316 Level: beginner 317 318 Concepts: point block Jacobi 319 320 321 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI 322 323 M*/ 324 325 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) 326 { 327 PC_PBJacobi *jac; 328 PetscErrorCode ierr; 329 330 PetscFunctionBegin; 331 /* 332 Creates the private data structure for this preconditioner and 333 attach it to the PC object. 334 */ 335 ierr = PetscNewLog(pc,&jac);CHKERRQ(ierr); 336 pc->data = (void*)jac; 337 338 /* 339 Initialize the pointers to vectors to ZERO; these will be used to store 340 diagonal entries of the matrix for fast preconditioner application. 341 */ 342 jac->diag = 0; 343 344 /* 345 Set the pointers for the functions that are provided above. 346 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 347 are called, they will automatically call these functions. Note we 348 choose not to provide a couple of these functions since they are 349 not needed. 350 */ 351 pc->ops->apply = 0; /*set depending on the block size */ 352 pc->ops->applytranspose = 0; 353 pc->ops->setup = PCSetUp_PBJacobi; 354 pc->ops->destroy = PCDestroy_PBJacobi; 355 pc->ops->setfromoptions = 0; 356 pc->ops->view = PCView_PBJacobi; 357 pc->ops->applyrichardson = 0; 358 pc->ops->applysymmetricleft = 0; 359 pc->ops->applysymmetricright = 0; 360 PetscFunctionReturn(0); 361 } 362 363 364