1 2 /* 3 Include files needed for the PBJacobi preconditioner: 4 pcimpl.h - private include file intended for use by all preconditioners 5 */ 6 7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 8 9 /* 10 Private context (data structure) for the PBJacobi preconditioner. 11 */ 12 typedef struct { 13 const MatScalar *diag; 14 PetscInt bs,mbs; 15 } PC_PBJacobi; 16 17 18 static PetscErrorCode PCApply_PBJacobi_1(PC pc,Vec x,Vec y) 19 { 20 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 21 PetscErrorCode ierr; 22 PetscInt i,m = jac->mbs; 23 const MatScalar *diag = jac->diag; 24 const PetscScalar *xx; 25 PetscScalar *yy; 26 27 PetscFunctionBegin; 28 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 29 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 30 for (i=0; i<m; i++) yy[i] = diag[i]*xx[i]; 31 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 32 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 33 ierr = PetscLogFlops(m);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 37 static PetscErrorCode PCApply_PBJacobi_2(PC pc,Vec x,Vec y) 38 { 39 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 40 PetscErrorCode ierr; 41 PetscInt i,m = jac->mbs; 42 const MatScalar *diag = jac->diag; 43 PetscScalar x0,x1,*yy; 44 const PetscScalar *xx; 45 46 PetscFunctionBegin; 47 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 48 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 49 for (i=0; i<m; i++) { 50 x0 = xx[2*i]; x1 = xx[2*i+1]; 51 yy[2*i] = diag[0]*x0 + diag[2]*x1; 52 yy[2*i+1] = diag[1]*x0 + diag[3]*x1; 53 diag += 4; 54 } 55 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 56 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 57 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 static PetscErrorCode PCApply_PBJacobi_3(PC pc,Vec x,Vec y) 61 { 62 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 63 PetscErrorCode ierr; 64 PetscInt i,m = jac->mbs; 65 const MatScalar *diag = jac->diag; 66 PetscScalar x0,x1,x2,*yy; 67 const PetscScalar *xx; 68 69 PetscFunctionBegin; 70 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 71 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 72 for (i=0; i<m; i++) { 73 x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2]; 74 75 yy[3*i] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 76 yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 77 yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 78 diag += 9; 79 } 80 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 81 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 82 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 static PetscErrorCode PCApply_PBJacobi_4(PC pc,Vec x,Vec y) 86 { 87 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 88 PetscErrorCode ierr; 89 PetscInt i,m = jac->mbs; 90 const MatScalar *diag = jac->diag; 91 PetscScalar x0,x1,x2,x3,*yy; 92 const PetscScalar *xx; 93 94 PetscFunctionBegin; 95 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 96 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 97 for (i=0; i<m; i++) { 98 x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3]; 99 100 yy[4*i] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 101 yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 102 yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3; 103 yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3; 104 diag += 16; 105 } 106 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 107 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 108 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y) 112 { 113 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 114 PetscErrorCode ierr; 115 PetscInt i,m = jac->mbs; 116 const MatScalar *diag = jac->diag; 117 PetscScalar x0,x1,x2,x3,x4,*yy; 118 const PetscScalar *xx; 119 120 PetscFunctionBegin; 121 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 122 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 123 for (i=0; i<m; i++) { 124 x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4]; 125 126 yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 127 yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 128 yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 129 yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 130 yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 131 diag += 25; 132 } 133 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 134 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 135 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 136 PetscFunctionReturn(0); 137 } 138 static PetscErrorCode PCApply_PBJacobi_6(PC pc,Vec x,Vec y) 139 { 140 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 141 PetscErrorCode ierr; 142 PetscInt i,m = jac->mbs; 143 const MatScalar *diag = jac->diag; 144 PetscScalar x0,x1,x2,x3,x4,x5,*yy; 145 const PetscScalar *xx; 146 147 PetscFunctionBegin; 148 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 149 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 150 for (i=0; i<m; i++) { 151 x0 = xx[6*i]; x1 = xx[6*i+1]; x2 = xx[6*i+2]; x3 = xx[6*i+3]; x4 = xx[6*i+4]; x5 = xx[6*i+5]; 152 153 yy[6*i] = diag[0]*x0 + diag[6]*x1 + diag[12]*x2 + diag[18]*x3 + diag[24]*x4 + diag[30]*x5; 154 yy[6*i+1] = diag[1]*x0 + diag[7]*x1 + diag[13]*x2 + diag[19]*x3 + diag[25]*x4 + diag[31]*x5; 155 yy[6*i+2] = diag[2]*x0 + diag[8]*x1 + diag[14]*x2 + diag[20]*x3 + diag[26]*x4 + diag[32]*x5; 156 yy[6*i+3] = diag[3]*x0 + diag[9]*x1 + diag[15]*x2 + diag[21]*x3 + diag[27]*x4 + diag[33]*x5; 157 yy[6*i+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2 + diag[22]*x3 + diag[28]*x4 + diag[34]*x5; 158 yy[6*i+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2 + diag[23]*x3 + diag[29]*x4 + diag[35]*x5; 159 diag += 36; 160 } 161 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 162 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 163 ierr = PetscLogFlops(66.0*m);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y) 167 { 168 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 169 PetscErrorCode ierr; 170 PetscInt i,m = jac->mbs; 171 const MatScalar *diag = jac->diag; 172 PetscScalar x0,x1,x2,x3,x4,x5,x6,*yy; 173 const PetscScalar *xx; 174 175 PetscFunctionBegin; 176 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 177 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 178 for (i=0; i<m; i++) { 179 x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6]; 180 181 yy[7*i] = diag[0]*x0 + diag[7]*x1 + diag[14]*x2 + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6; 182 yy[7*i+1] = diag[1]*x0 + diag[8]*x1 + diag[15]*x2 + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6; 183 yy[7*i+2] = diag[2]*x0 + diag[9]*x1 + diag[16]*x2 + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6; 184 yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2 + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6; 185 yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2 + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6; 186 yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2 + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6; 187 yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2 + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6; 188 diag += 49; 189 } 190 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 191 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 192 ierr = PetscLogFlops(91*m);CHKERRQ(ierr); /* 2*bs2 - bs */ 193 PetscFunctionReturn(0); 194 } 195 /* -------------------------------------------------------------------------- */ 196 static PetscErrorCode PCSetUp_PBJacobi(PC pc) 197 { 198 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 199 PetscErrorCode ierr; 200 Mat A = pc->pmat; 201 MatFactorError err; 202 PetscInt nlocal; 203 204 PetscFunctionBegin; 205 ierr = MatInvertBlockDiagonal(A,&jac->diag);CHKERRQ(ierr); 206 ierr = MatFactorGetError(A,&err);CHKERRQ(ierr); 207 if (err) pc->failedreason = (PCFailedReason)err; 208 209 ierr = MatGetBlockSize(A,&jac->bs);CHKERRQ(ierr); 210 ierr = MatGetLocalSize(A,&nlocal,NULL);CHKERRQ(ierr); 211 jac->mbs = nlocal/jac->bs; 212 switch (jac->bs) { 213 case 1: 214 pc->ops->apply = PCApply_PBJacobi_1; 215 break; 216 case 2: 217 pc->ops->apply = PCApply_PBJacobi_2; 218 break; 219 case 3: 220 pc->ops->apply = PCApply_PBJacobi_3; 221 break; 222 case 4: 223 pc->ops->apply = PCApply_PBJacobi_4; 224 break; 225 case 5: 226 pc->ops->apply = PCApply_PBJacobi_5; 227 break; 228 case 6: 229 pc->ops->apply = PCApply_PBJacobi_6; 230 break; 231 case 7: 232 pc->ops->apply = PCApply_PBJacobi_7; 233 break; 234 default: 235 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"not supported for block size %D",jac->bs); 236 } 237 PetscFunctionReturn(0); 238 } 239 /* -------------------------------------------------------------------------- */ 240 static PetscErrorCode PCDestroy_PBJacobi(PC pc) 241 { 242 PetscErrorCode ierr; 243 244 PetscFunctionBegin; 245 /* 246 Free the private data structure that was hanging off the PC 247 */ 248 ierr = PetscFree(pc->data);CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 static PetscErrorCode PCView_PBJacobi(PC pc,PetscViewer viewer) 253 { 254 PetscErrorCode ierr; 255 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 256 PetscBool iascii; 257 258 PetscFunctionBegin; 259 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 260 if (iascii) { 261 ierr = PetscViewerASCIIPrintf(viewer," point-block size %D\n",jac->bs);CHKERRQ(ierr); 262 } 263 PetscFunctionReturn(0); 264 } 265 266 /* -------------------------------------------------------------------------- */ 267 /*MC 268 PCPBJACOBI - Point block Jacobi preconditioner 269 270 271 Notes: See PCJACOBI for point Jacobi preconditioning 272 273 This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix 274 275 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 276 is detected a PETSc error is generated. 277 278 Developer Notes: This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow 279 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 280 terminating KSP with a KSP_DIVERGED_NANORIF allowing 281 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 282 283 Developer Note: Perhaps should provide an option that allows generation of a valid preconditioner 284 even if a block is singular as the PCJACOBI does. 285 286 Level: beginner 287 288 Concepts: point block Jacobi 289 290 291 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI 292 293 M*/ 294 295 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) 296 { 297 PC_PBJacobi *jac; 298 PetscErrorCode ierr; 299 300 PetscFunctionBegin; 301 /* 302 Creates the private data structure for this preconditioner and 303 attach it to the PC object. 304 */ 305 ierr = PetscNewLog(pc,&jac);CHKERRQ(ierr); 306 pc->data = (void*)jac; 307 308 /* 309 Initialize the pointers to vectors to ZERO; these will be used to store 310 diagonal entries of the matrix for fast preconditioner application. 311 */ 312 jac->diag = 0; 313 314 /* 315 Set the pointers for the functions that are provided above. 316 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 317 are called, they will automatically call these functions. Note we 318 choose not to provide a couple of these functions since they are 319 not needed. 320 */ 321 pc->ops->apply = 0; /*set depending on the block size */ 322 pc->ops->applytranspose = 0; 323 pc->ops->setup = PCSetUp_PBJacobi; 324 pc->ops->destroy = PCDestroy_PBJacobi; 325 pc->ops->setfromoptions = 0; 326 pc->ops->view = PCView_PBJacobi; 327 pc->ops->applyrichardson = 0; 328 pc->ops->applysymmetricleft = 0; 329 pc->ops->applysymmetricright = 0; 330 PetscFunctionReturn(0); 331 } 332 333 334