1 2 /* 3 Include files needed for the variable size block PBJacobi preconditioner: 4 pcimpl.h - private include file intended for use by all preconditioners 5 */ 6 7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 8 9 /* 10 Private context (data structure) for the VPBJacobi preconditioner. 11 */ 12 typedef struct { 13 MatScalar *diag; 14 } PC_VPBJacobi; 15 16 static PetscErrorCode PCApply_VPBJacobi(PC pc,Vec x,Vec y) 17 { 18 PC_VPBJacobi *jac = (PC_VPBJacobi*)pc->data; 19 PetscErrorCode ierr; 20 PetscInt i,ncnt = 0; 21 const MatScalar *diag = jac->diag; 22 PetscInt ib,jb,bs; 23 const PetscScalar *xx; 24 PetscScalar *yy,x0,x1,x2,x3,x4,x5,x6; 25 PetscInt nblocks; 26 const PetscInt *bsizes; 27 28 PetscFunctionBegin; 29 ierr = MatGetVariableBlockSizes(pc->pmat,&nblocks,&bsizes);CHKERRQ(ierr); 30 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 31 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 32 for (i=0; i<nblocks; i++) { 33 bs = bsizes[i]; 34 switch (bs) { 35 case 1: 36 yy[ncnt] = *diag*xx[ncnt]; 37 break; 38 case 2: 39 x0 = xx[ncnt]; x1 = xx[ncnt+1]; 40 yy[ncnt] = diag[0]*x0 + diag[2]*x1; 41 yy[ncnt+1] = diag[1]*x0 + diag[3]*x1; 42 break; 43 case 3: 44 x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; 45 yy[ncnt] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 46 yy[ncnt+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 47 yy[ncnt+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 48 break; 49 case 4: 50 x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; 51 yy[ncnt] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 52 yy[ncnt+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 53 yy[ncnt+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3; 54 yy[ncnt+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3; 55 break; 56 case 5: 57 x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; 58 yy[ncnt] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 59 yy[ncnt+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 60 yy[ncnt+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 61 yy[ncnt+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 62 yy[ncnt+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 63 break; 64 case 6: 65 x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; x5 = xx[ncnt+5]; 66 yy[ncnt] = diag[0]*x0 + diag[6]*x1 + diag[12]*x2 + diag[18]*x3 + diag[24]*x4 + diag[30]*x5; 67 yy[ncnt+1] = diag[1]*x0 + diag[7]*x1 + diag[13]*x2 + diag[19]*x3 + diag[25]*x4 + diag[31]*x5; 68 yy[ncnt+2] = diag[2]*x0 + diag[8]*x1 + diag[14]*x2 + diag[20]*x3 + diag[26]*x4 + diag[32]*x5; 69 yy[ncnt+3] = diag[3]*x0 + diag[9]*x1 + diag[15]*x2 + diag[21]*x3 + diag[27]*x4 + diag[33]*x5; 70 yy[ncnt+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2 + diag[22]*x3 + diag[28]*x4 + diag[34]*x5; 71 yy[ncnt+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2 + diag[23]*x3 + diag[29]*x4 + diag[35]*x5; 72 break; 73 case 7: 74 x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; x5 = xx[ncnt+5]; x6 = xx[ncnt+6]; 75 yy[ncnt] = diag[0]*x0 + diag[7]*x1 + diag[14]*x2 + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6; 76 yy[ncnt+1] = diag[1]*x0 + diag[8]*x1 + diag[15]*x2 + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6; 77 yy[ncnt+2] = diag[2]*x0 + diag[9]*x1 + diag[16]*x2 + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6; 78 yy[ncnt+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2 + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6; 79 yy[ncnt+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2 + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6; 80 yy[ncnt+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2 + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6; 81 yy[ncnt+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2 + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6; 82 break; 83 default: 84 for (ib=0; ib<bs; ib++) { 85 PetscScalar rowsum = 0; 86 for (jb=0; jb<bs; jb++) { 87 rowsum += diag[ib+jb*bs] * xx[ncnt+jb]; 88 } 89 yy[ncnt+ib] = rowsum; 90 } 91 } 92 ncnt += bsizes[i]; 93 diag += bsizes[i]*bsizes[i]; 94 } 95 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 96 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 100 /* -------------------------------------------------------------------------- */ 101 static PetscErrorCode PCSetUp_VPBJacobi(PC pc) 102 { 103 PC_VPBJacobi *jac = (PC_VPBJacobi*)pc->data; 104 PetscErrorCode ierr; 105 Mat A = pc->pmat; 106 MatFactorError err; 107 PetscInt i,nsize = 0,nlocal; 108 PetscInt nblocks; 109 const PetscInt *bsizes; 110 111 PetscFunctionBegin; 112 ierr = MatGetVariableBlockSizes(pc->pmat,&nblocks,&bsizes);CHKERRQ(ierr); 113 ierr = MatGetLocalSize(pc->pmat,&nlocal,NULL);CHKERRQ(ierr); 114 if (nlocal && !nblocks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatSetVariableBlockSizes() before using PCVPBJACOBI"); 115 if (!jac->diag) { 116 for (i=0; i<nblocks; i++) nsize += bsizes[i]*bsizes[i]; 117 ierr = PetscMalloc1(nsize,&jac->diag);CHKERRQ(ierr); 118 } 119 ierr = MatInvertVariableBlockDiagonal(A,nblocks,bsizes,jac->diag);CHKERRQ(ierr); 120 ierr = MatFactorGetError(A,&err);CHKERRQ(ierr); 121 if (err) pc->failedreason = (PCFailedReason)err; 122 pc->ops->apply = PCApply_VPBJacobi; 123 PetscFunctionReturn(0); 124 } 125 /* -------------------------------------------------------------------------- */ 126 static PetscErrorCode PCDestroy_VPBJacobi(PC pc) 127 { 128 PC_VPBJacobi *jac = (PC_VPBJacobi*)pc->data; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 /* 133 Free the private data structure that was hanging off the PC 134 */ 135 ierr = PetscFree(jac->diag);CHKERRQ(ierr); 136 ierr = PetscFree(pc->data);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 /* -------------------------------------------------------------------------- */ 141 /*MC 142 PCVPBJACOBI - Variable size point block Jacobi preconditioner 143 144 Notes: 145 See PCJACOBI for point Jacobi preconditioning, PCPBJACOBI for fixed point block size, and PCBJACOBI for large size blocks 146 147 This works for AIJ matrices 148 149 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 150 is detected a PETSc error is generated. 151 152 One must call MatSetVariableBlockSizes() to use this preconditioner 153 154 Developer Notes: 155 This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow 156 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 157 terminating KSP with a KSP_DIVERGED_NANORIF allowing 158 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 159 160 Perhaps should provide an option that allows generation of a valid preconditioner 161 even if a block is singular as the PCJACOBI does. 162 163 Level: beginner 164 165 .seealso: MatSetVariableBlockSizes(), PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI, PCPBJACOBI, PCBJACOBI 166 167 M*/ 168 169 PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc) 170 { 171 PC_VPBJacobi *jac; 172 PetscErrorCode ierr; 173 174 PetscFunctionBegin; 175 /* 176 Creates the private data structure for this preconditioner and 177 attach it to the PC object. 178 */ 179 ierr = PetscNewLog(pc,&jac);CHKERRQ(ierr); 180 pc->data = (void*)jac; 181 182 /* 183 Initialize the pointers to vectors to ZERO; these will be used to store 184 diagonal entries of the matrix for fast preconditioner application. 185 */ 186 jac->diag = NULL; 187 188 /* 189 Set the pointers for the functions that are provided above. 190 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 191 are called, they will automatically call these functions. Note we 192 choose not to provide a couple of these functions since they are 193 not needed. 194 */ 195 pc->ops->apply = PCApply_VPBJacobi; 196 pc->ops->applytranspose = NULL; 197 pc->ops->setup = PCSetUp_VPBJacobi; 198 pc->ops->destroy = PCDestroy_VPBJacobi; 199 pc->ops->setfromoptions = NULL; 200 pc->ops->applyrichardson = NULL; 201 pc->ops->applysymmetricleft = NULL; 202 pc->ops->applysymmetricright = NULL; 203 PetscFunctionReturn(0); 204 } 205 206