1 2 /* 3 This is included by sbaij.c to generate unsigned short and regular versions of these two functions 4 */ 5 #undef USHORT 6 #if defined(USESHORT) 7 #define USHORT _ushort 8 #else 9 #define USHORT 10 #endif 11 #define PETSCMAP1_a(a,b) a ## b 12 #define PETSCMAP1_b(a,b) PETSCMAP1_a(a,b) 13 #define PETSCMAP1(a) PETSCMAP1_b(a,USHORT) 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMult_SeqSBAIJ_1_Hermitian" 17 PetscErrorCode PETSCMAP1(MatMult_SeqSBAIJ_1_Hermitian)(Mat A,Vec xx,Vec zz) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 const PetscScalar *x; 21 PetscScalar *z,x1,sum; 22 const MatScalar *v; 23 MatScalar vj; 24 PetscErrorCode ierr; 25 PetscInt mbs=a->mbs,i,j,nz; 26 const PetscInt *ai=a->i; 27 #if defined(USESHORT) 28 const unsigned short *ib=a->jshort; 29 unsigned short ibt; 30 #else 31 const PetscInt *ib=a->j; 32 PetscInt ibt; 33 #endif 34 35 PetscFunctionBegin; 36 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 37 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 38 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 39 40 v = a->a; 41 for (i=0; i<mbs; i++) { 42 nz = ai[i+1] - ai[i]; /* length of i_th row of A */ 43 x1 = x[i]; 44 sum = v[0]*x1; /* diagonal term */ 45 for (j=1; j<nz; j++) { 46 ibt = ib[j]; 47 vj = v[j]; 48 sum += vj * x[ibt]; /* (strict upper triangular part of A)*x */ 49 z[ibt] += PetscConj(v[j]) * x1; /* (strict lower triangular part of A)*x */ 50 } 51 z[i] += sum; 52 v += nz; 53 ib += nz; 54 } 55 56 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 57 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 58 ierr = PetscLogFlops(2.0*(2.0*a->nz - mbs) - mbs);CHKERRQ(ierr); 59 PetscFunctionReturn(0); 60 } 61 62 #undef __FUNCT__ 63 #define __FUNCT__ "MatMult_SeqSBAIJ_1" 64 PetscErrorCode PETSCMAP1(MatMult_SeqSBAIJ_1)(Mat A,Vec xx,Vec zz) 65 { 66 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 67 const PetscScalar *x; 68 PetscScalar *z,x1,sum; 69 const MatScalar *v; 70 MatScalar vj; 71 PetscErrorCode ierr; 72 PetscInt mbs=a->mbs,i,j,nz; 73 const PetscInt *ai=a->i; 74 #if defined(USESHORT) 75 const unsigned short *ib=a->jshort; 76 unsigned short ibt; 77 #else 78 const PetscInt *ib=a->j; 79 PetscInt ibt; 80 #endif 81 82 PetscFunctionBegin; 83 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 84 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 85 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 86 87 v = a->a; 88 for (i=0; i<mbs; i++) { 89 nz = ai[i+1] - ai[i]; /* length of i_th row of A */ 90 x1 = x[i]; 91 sum = v[0]*x1; /* diagonal term */ 92 for (j=1; j<nz; j++) { 93 ibt = ib[j]; 94 vj = v[j]; 95 z[ibt] += vj * x1; /* (strict lower triangular part of A)*x */ 96 sum += vj * x[ibt]; /* (strict upper triangular part of A)*x */ 97 } 98 z[i] += sum; 99 v += nz; 100 ib += nz; 101 } 102 103 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 104 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 105 ierr = PetscLogFlops(2.0*(2.0*a->nz - mbs) - mbs);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "MatSOR_SeqSBAIJ" 111 PetscErrorCode PETSCMAP1(MatSOR_SeqSBAIJ)(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 112 { 113 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 114 const MatScalar *aa=a->a,*v,*v1,*aidiag; 115 PetscScalar *x,*t,sum; 116 const PetscScalar *b; 117 MatScalar tmp; 118 PetscErrorCode ierr; 119 PetscInt m=a->mbs,bs=A->rmap->bs,j; 120 const PetscInt *ai=a->i; 121 #if defined(USESHORT) 122 const unsigned short *aj=a->jshort,*vj,*vj1; 123 #else 124 const PetscInt *aj=a->j,*vj,*vj1; 125 #endif 126 PetscInt nz,nz1,i; 127 128 PetscFunctionBegin; 129 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 130 131 its = its*lits; 132 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 133 134 if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 135 136 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 137 if (xx != bb) { 138 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 139 } else { 140 b = x; 141 } 142 143 if (!a->idiagvalid) { 144 if (!a->idiag) { 145 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 146 } 147 for (i=0; i<a->mbs; i++) a->idiag[i] = 1.0/a->a[a->i[i]]; 148 a->idiagvalid = PETSC_TRUE; 149 } 150 151 if (!a->sor_work) { 152 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 153 } 154 t = a->sor_work; 155 156 aidiag = a->idiag; 157 158 if (flag == SOR_APPLY_UPPER) { 159 /* apply (U + D/omega) to the vector */ 160 PetscScalar d; 161 for (i=0; i<m; i++) { 162 d = fshift + aa[ai[i]]; 163 nz = ai[i+1] - ai[i] - 1; 164 vj = aj + ai[i] + 1; 165 v = aa + ai[i] + 1; 166 sum = b[i]*d/omega; 167 PetscSparseDensePlusDot(sum,b,v,vj,nz); 168 x[i] = sum; 169 } 170 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 171 } 172 173 if (flag & SOR_ZERO_INITIAL_GUESS) { 174 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 175 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 176 177 v = aa + 1; 178 vj = aj + 1; 179 for (i=0; i<m; i++){ 180 nz = ai[i+1] - ai[i] - 1; 181 tmp = - (x[i] = omega*t[i]*aidiag[i]); 182 for (j=0; j<nz; j++) { 183 t[vj[j]] += tmp*v[j]; 184 } 185 v += nz + 1; 186 vj += nz + 1; 187 } 188 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 189 } 190 191 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 192 int nz2; 193 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 194 #if defined(PETSC_USE_BACKWARD_LOOP) 195 v = aa + ai[m] - 1; 196 vj = aj + ai[m] - 1; 197 for (i=m-1; i>=0; i--){ 198 sum = b[i]; 199 nz = ai[i+1] - ai[i] - 1; 200 {PetscInt __i;for(__i=0;__i<nz;__i++) sum -= v[-__i] * x[vj[-__i]];} 201 #else 202 v = aa + ai[m-1] + 1; 203 vj = aj + ai[m-1] + 1; 204 nz = 0; 205 for (i=m-1; i>=0; i--){ 206 sum = b[i]; 207 nz2 = ai[i] - ai[i-1] - 1; 208 PETSC_Prefetch(v-nz2-1,0,1); 209 PETSC_Prefetch(vj-nz2-1,0,1); 210 PetscSparseDenseMinusDot(sum,x,v,vj,nz); 211 nz = nz2; 212 #endif 213 x[i] = omega*sum*aidiag[i]; 214 v -= nz + 1; 215 vj -= nz + 1; 216 } 217 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 218 } else { 219 v = aa + ai[m-1] + 1; 220 vj = aj + ai[m-1] + 1; 221 nz = 0; 222 for (i=m-1; i>=0; i--){ 223 sum = t[i]; 224 nz2 = ai[i] - ai[i-1] - 1; 225 PETSC_Prefetch(v-nz2-1,0,1); 226 PETSC_Prefetch(vj-nz2-1,0,1); 227 PetscSparseDenseMinusDot(sum,x,v,vj,nz); 228 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 229 nz = nz2; 230 v -= nz + 1; 231 vj -= nz + 1; 232 } 233 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 234 } 235 } 236 its--; 237 } 238 239 while (its--) { 240 /* 241 forward sweep: 242 for i=0,...,m-1: 243 sum[i] = (b[i] - U(i,:)x )/d[i]; 244 x[i] = (1-omega)x[i] + omega*sum[i]; 245 b = b - x[i]*U^T(i,:); 246 247 */ 248 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 249 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 250 251 for (i=0; i<m; i++){ 252 v = aa + ai[i] + 1; v1=v; 253 vj = aj + ai[i] + 1; vj1=vj; 254 nz = ai[i+1] - ai[i] - 1; nz1=nz; 255 sum = t[i]; 256 ierr = PetscLogFlops(4.0*nz-2);CHKERRQ(ierr); 257 while (nz1--) sum -= (*v1++)*x[*vj1++]; 258 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 259 while (nz--) t[*vj++] -= x[i]*(*v++); 260 } 261 } 262 263 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 264 /* 265 backward sweep: 266 b = b - x[i]*U^T(i,:), i=0,...,n-2 267 for i=m-1,...,0: 268 sum[i] = (b[i] - U(i,:)x )/d[i]; 269 x[i] = (1-omega)x[i] + omega*sum[i]; 270 */ 271 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 272 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 273 274 for (i=0; i<m-1; i++){ /* update rhs */ 275 v = aa + ai[i] + 1; 276 vj = aj + ai[i] + 1; 277 nz = ai[i+1] - ai[i] - 1; 278 ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr); 279 while (nz--) t[*vj++] -= x[i]*(*v++); 280 } 281 for (i=m-1; i>=0; i--){ 282 v = aa + ai[i] + 1; 283 vj = aj + ai[i] + 1; 284 nz = ai[i+1] - ai[i] - 1; 285 ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr); 286 sum = t[i]; 287 while (nz--) sum -= x[*vj++]*(*v++); 288 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 289 } 290 } 291 } 292 293 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 294 if (bb != xx) { 295 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 296 } 297 PetscFunctionReturn(0); 298 } 299