1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for SBAIJ format. 5 */ 6 7 #include "src/mat/impls/sbaij/seq/sbaij.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/inline/ilu.h" 10 #include "src/inline/dot.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 15 { 16 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 17 IS isrow=a->row; 18 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 19 PetscErrorCode ierr; 20 PetscInt nz,*vj,k,*r,idx,k1; 21 PetscInt bs=A->rmap->bs,bs2 = a->bs2; 22 MatScalar *aa=a->a,*v,*diag; 23 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 24 25 PetscFunctionBegin; 26 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 t = a->solve_work; 29 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 30 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 31 32 /* solve U^T * D * y = b by forward substitution */ 33 xk = t; 34 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 35 idx = bs*r[k]; 36 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 37 } 38 for (k=0; k<mbs; k++){ 39 v = aa + bs2*ai[k]; 40 xk = t + k*bs; /* Dk*xk = k-th block of x */ 41 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 42 nz = ai[k+1] - ai[k]; 43 vj = aj + ai[k]; 44 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 45 while (nz--) { 46 /* x(:) += U(k,:)^T*(Dk*xk) */ 47 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 48 vj++; xj = t + (*vj)*bs; 49 v += bs2; 50 } 51 /* xk = inv(Dk)*(Dk*xk) */ 52 diag = aa+k*bs2; /* ptr to inv(Dk) */ 53 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 54 } 55 56 /* solve U*x = y by back substitution */ 57 for (k=mbs-1; k>=0; k--){ 58 v = aa + bs2*ai[k]; 59 xk = t + k*bs; /* xk */ 60 nz = ai[k+1] - ai[k]; 61 vj = aj + ai[k]; 62 xj = t + (*vj)*bs; 63 while (nz--) { 64 /* xk += U(k,:)*x(:) */ 65 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 66 vj++; 67 v += bs2; xj = t + (*vj)*bs; 68 } 69 idx = bs*r[k]; 70 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 71 } 72 73 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 74 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 75 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 76 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 77 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N" 83 PetscErrorCode MatForwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 84 { 85 PetscFunctionBegin; 86 SETERRQ(1,"not implemented yet"); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N" 92 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 93 { 94 PetscFunctionBegin; 95 SETERRQ(1,"not implemented yet"); 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 101 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 102 { 103 PetscErrorCode ierr; 104 PetscInt nz,*vj,k; 105 PetscInt bs2 = bs*bs; 106 MatScalar *v,*diag; 107 PetscScalar *xk,*xj,*xk_tmp; 108 109 PetscFunctionBegin; 110 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 111 for (k=0; k<mbs; k++){ 112 v = aa + bs2*ai[k]; 113 xk = x + k*bs; /* Dk*xk = k-th block of x */ 114 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 115 nz = ai[k+1] - ai[k]; 116 vj = aj + ai[k]; 117 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 118 while (nz--) { 119 /* x(:) += U(k,:)^T*(Dk*xk) */ 120 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 121 vj++; xj = x + (*vj)*bs; 122 v += bs2; 123 } 124 /* xk = inv(Dk)*(Dk*xk) */ 125 diag = aa+k*bs2; /* ptr to inv(Dk) */ 126 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 127 } 128 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 134 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 135 { 136 PetscInt nz,*vj,k; 137 PetscInt bs2 = bs*bs; 138 MatScalar *v; 139 PetscScalar *xk,*xj; 140 141 PetscFunctionBegin; 142 for (k=mbs-1; k>=0; k--){ 143 v = aa + bs2*ai[k]; 144 xk = x + k*bs; /* xk */ 145 nz = ai[k+1] - ai[k]; 146 vj = aj + ai[k]; 147 xj = x + (*vj)*bs; 148 while (nz--) { 149 /* xk += U(k,:)*x(:) */ 150 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 151 vj++; 152 v += bs2; xj = x + (*vj)*bs; 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 160 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 161 { 162 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 163 PetscErrorCode ierr; 164 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 165 PetscInt bs=A->rmap->bs; 166 MatScalar *aa=a->a; 167 PetscScalar *x,*b; 168 #if defined(PETSC_USE_LOG) 169 PetscInt bs2 = a->bs2; 170 #endif 171 172 PetscFunctionBegin; 173 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 176 /* solve U^T * D * y = b by forward substitution */ 177 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 178 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 179 180 /* solve U*x = y by back substitution */ 181 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 182 183 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 184 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 185 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering" 191 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 192 { 193 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 194 PetscErrorCode ierr; 195 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 196 PetscInt bs=A->rmap->bs; 197 MatScalar *aa=a->a; 198 PetscScalar *x,*b; 199 #if defined(PETSC_USE_LOG) 200 PetscInt bs2 = a->bs2; 201 #endif 202 203 PetscFunctionBegin; 204 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 205 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 206 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 207 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 208 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 209 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 210 ierr = PetscLogFlops(bs2*a->nz + A->rmap->N);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering" 216 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 217 { 218 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 219 PetscErrorCode ierr; 220 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 221 PetscInt bs=A->rmap->bs; 222 MatScalar *aa=a->a; 223 PetscScalar *x,*b; 224 #if defined(PETSC_USE_LOG) 225 PetscInt bs2 = a->bs2; 226 #endif 227 228 PetscFunctionBegin; 229 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 230 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 231 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 232 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 233 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 241 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 242 { 243 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 244 IS isrow=a->row; 245 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 246 PetscErrorCode ierr; 247 PetscInt nz,*vj,k,*r,idx; 248 MatScalar *aa=a->a,*v,*d; 249 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 250 251 PetscFunctionBegin; 252 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 253 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 254 t = a->solve_work; 255 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 256 257 /* solve U^T * D * y = b by forward substitution */ 258 tp = t; 259 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 260 idx = 7*r[k]; 261 tp[0] = b[idx]; 262 tp[1] = b[idx+1]; 263 tp[2] = b[idx+2]; 264 tp[3] = b[idx+3]; 265 tp[4] = b[idx+4]; 266 tp[5] = b[idx+5]; 267 tp[6] = b[idx+6]; 268 tp += 7; 269 } 270 271 for (k=0; k<mbs; k++){ 272 v = aa + 49*ai[k]; 273 vj = aj + ai[k]; 274 tp = t + k*7; 275 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 276 nz = ai[k+1] - ai[k]; 277 tp = t + (*vj)*7; 278 while (nz--) { 279 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 280 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 281 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 282 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 283 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 284 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 285 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 286 vj++; tp = t + (*vj)*7; 287 v += 49; 288 } 289 290 /* xk = inv(Dk)*(Dk*xk) */ 291 d = aa+k*49; /* ptr to inv(Dk) */ 292 tp = t + k*7; 293 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 294 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 295 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 296 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 297 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 298 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 299 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 300 } 301 302 /* solve U*x = y by back substitution */ 303 for (k=mbs-1; k>=0; k--){ 304 v = aa + 49*ai[k]; 305 vj = aj + ai[k]; 306 tp = t + k*7; 307 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 308 nz = ai[k+1] - ai[k]; 309 310 tp = t + (*vj)*7; 311 while (nz--) { 312 /* xk += U(k,:)*x(:) */ 313 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 314 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 315 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 316 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 317 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 318 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 319 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 320 vj++; tp = t + (*vj)*7; 321 v += 49; 322 } 323 tp = t + k*7; 324 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 325 idx = 7*r[k]; 326 x[idx] = x0; 327 x[idx+1] = x1; 328 x[idx+2] = x2; 329 x[idx+3] = x3; 330 x[idx+4] = x4; 331 x[idx+5] = x5; 332 x[idx+6] = x6; 333 } 334 335 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 336 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 337 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 338 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 339 PetscFunctionReturn(0); 340 } 341 342 #undef __FUNCT__ 343 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 344 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 345 { 346 MatScalar *v,*d; 347 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 348 PetscInt nz,*vj,k; 349 350 PetscFunctionBegin; 351 for (k=0; k<mbs; k++){ 352 v = aa + 49*ai[k]; 353 xp = x + k*7; 354 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 355 nz = ai[k+1] - ai[k]; 356 vj = aj + ai[k]; 357 xp = x + (*vj)*7; 358 while (nz--) { 359 /* x(:) += U(k,:)^T*(Dk*xk) */ 360 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 361 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 362 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 363 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 364 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 365 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 366 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 367 vj++; xp = x + (*vj)*7; 368 v += 49; 369 } 370 /* xk = inv(Dk)*(Dk*xk) */ 371 d = aa+k*49; /* ptr to inv(Dk) */ 372 xp = x + k*7; 373 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 374 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 375 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 376 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 377 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 378 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 379 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 380 } 381 PetscFunctionReturn(0); 382 } 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 386 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 387 { 388 MatScalar *v; 389 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 390 PetscInt nz,*vj,k; 391 392 PetscFunctionBegin; 393 for (k=mbs-1; k>=0; k--){ 394 v = aa + 49*ai[k]; 395 xp = x + k*7; 396 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 397 nz = ai[k+1] - ai[k]; 398 vj = aj + ai[k]; 399 xp = x + (*vj)*7; 400 while (nz--) { 401 /* xk += U(k,:)*x(:) */ 402 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 403 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 404 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 405 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 406 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 407 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 408 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 409 vj++; 410 v += 49; xp = x + (*vj)*7; 411 } 412 xp = x + k*7; 413 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 414 } 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 420 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 421 { 422 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 423 PetscErrorCode ierr; 424 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 425 MatScalar *aa=a->a; 426 PetscScalar *x,*b; 427 428 PetscFunctionBegin; 429 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 430 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 431 432 /* solve U^T * D * y = b by forward substitution */ 433 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 434 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 435 436 /* solve U*x = y by back substitution */ 437 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 438 439 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 440 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 441 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 442 PetscFunctionReturn(0); 443 } 444 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering" 447 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 448 { 449 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 450 PetscErrorCode ierr; 451 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 452 MatScalar *aa=a->a; 453 PetscScalar *x,*b; 454 455 PetscFunctionBegin; 456 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 457 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 458 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 459 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 460 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 461 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 462 ierr = PetscLogFlops(49*a->nz + mbs);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering" 468 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 469 { 470 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 471 PetscErrorCode ierr; 472 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 473 MatScalar *aa=a->a; 474 PetscScalar *x,*b; 475 476 PetscFunctionBegin; 477 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 478 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 479 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 480 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 481 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 482 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 483 ierr = PetscLogFlops(49*a->nz);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 489 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 490 { 491 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 492 IS isrow=a->row; 493 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 494 PetscErrorCode ierr; 495 PetscInt nz,*vj,k,*r,idx; 496 MatScalar *aa=a->a,*v,*d; 497 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 498 499 PetscFunctionBegin; 500 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 501 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 502 t = a->solve_work; 503 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 504 505 /* solve U^T * D * y = b by forward substitution */ 506 tp = t; 507 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 508 idx = 6*r[k]; 509 tp[0] = b[idx]; 510 tp[1] = b[idx+1]; 511 tp[2] = b[idx+2]; 512 tp[3] = b[idx+3]; 513 tp[4] = b[idx+4]; 514 tp[5] = b[idx+5]; 515 tp += 6; 516 } 517 518 for (k=0; k<mbs; k++){ 519 v = aa + 36*ai[k]; 520 vj = aj + ai[k]; 521 tp = t + k*6; 522 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 523 nz = ai[k+1] - ai[k]; 524 tp = t + (*vj)*6; 525 while (nz--) { 526 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 527 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 528 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 529 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 530 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 531 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 532 vj++; tp = t + (*vj)*6; 533 v += 36; 534 } 535 536 /* xk = inv(Dk)*(Dk*xk) */ 537 d = aa+k*36; /* ptr to inv(Dk) */ 538 tp = t + k*6; 539 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 540 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 541 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 542 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 543 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 544 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 545 } 546 547 /* solve U*x = y by back substitution */ 548 for (k=mbs-1; k>=0; k--){ 549 v = aa + 36*ai[k]; 550 vj = aj + ai[k]; 551 tp = t + k*6; 552 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 553 nz = ai[k+1] - ai[k]; 554 555 tp = t + (*vj)*6; 556 while (nz--) { 557 /* xk += U(k,:)*x(:) */ 558 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 559 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 560 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 561 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 562 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 563 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 564 vj++; tp = t + (*vj)*6; 565 v += 36; 566 } 567 tp = t + k*6; 568 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 569 idx = 6*r[k]; 570 x[idx] = x0; 571 x[idx+1] = x1; 572 x[idx+2] = x2; 573 x[idx+3] = x3; 574 x[idx+4] = x4; 575 x[idx+5] = x5; 576 } 577 578 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 579 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 580 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 581 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 587 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 588 { 589 MatScalar *v,*d; 590 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 591 PetscInt nz,*vj,k; 592 593 PetscFunctionBegin; 594 for (k=0; k<mbs; k++){ 595 v = aa + 36*ai[k]; 596 xp = x + k*6; 597 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 598 nz = ai[k+1] - ai[k]; 599 vj = aj + ai[k]; 600 xp = x + (*vj)*6; 601 while (nz--) { 602 /* x(:) += U(k,:)^T*(Dk*xk) */ 603 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 604 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 605 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 606 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 607 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 608 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 609 vj++; xp = x + (*vj)*6; 610 v += 36; 611 } 612 /* xk = inv(Dk)*(Dk*xk) */ 613 d = aa+k*36; /* ptr to inv(Dk) */ 614 xp = x + k*6; 615 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 616 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 617 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 618 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 619 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 620 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 621 } 622 PetscFunctionReturn(0); 623 } 624 #undef __FUNCT__ 625 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 626 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 627 { 628 MatScalar *v; 629 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 630 PetscInt nz,*vj,k; 631 632 PetscFunctionBegin; 633 for (k=mbs-1; k>=0; k--){ 634 v = aa + 36*ai[k]; 635 xp = x + k*6; 636 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 637 nz = ai[k+1] - ai[k]; 638 vj = aj + ai[k]; 639 xp = x + (*vj)*6; 640 while (nz--) { 641 /* xk += U(k,:)*x(:) */ 642 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 643 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 644 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 645 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 646 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 647 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 648 vj++; 649 v += 36; xp = x + (*vj)*6; 650 } 651 xp = x + k*6; 652 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 653 } 654 PetscFunctionReturn(0); 655 } 656 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 660 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 661 { 662 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 663 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 664 MatScalar *aa=a->a; 665 PetscScalar *x,*b; 666 PetscErrorCode ierr; 667 668 PetscFunctionBegin; 669 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 670 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 671 672 /* solve U^T * D * y = b by forward substitution */ 673 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 674 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 675 676 /* solve U*x = y by back substitution */ 677 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 678 679 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 680 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 681 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering" 687 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 688 { 689 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 690 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 691 MatScalar *aa=a->a; 692 PetscScalar *x,*b; 693 PetscErrorCode ierr; 694 695 PetscFunctionBegin; 696 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 697 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 698 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 699 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 700 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 701 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 702 ierr = PetscLogFlops(36*a->nz + mbs);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNCT__ 707 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering" 708 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 709 { 710 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 711 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 712 MatScalar *aa=a->a; 713 PetscScalar *x,*b; 714 PetscErrorCode ierr; 715 716 PetscFunctionBegin; 717 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 718 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 719 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 720 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 721 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 722 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 723 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 729 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 730 { 731 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 732 IS isrow=a->row; 733 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 734 PetscErrorCode ierr; 735 PetscInt nz,*vj,k,*r,idx; 736 MatScalar *aa=a->a,*v,*diag; 737 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 738 739 PetscFunctionBegin; 740 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 741 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 742 t = a->solve_work; 743 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 744 745 /* solve U^T * D * y = b by forward substitution */ 746 tp = t; 747 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 748 idx = 5*r[k]; 749 tp[0] = b[idx]; 750 tp[1] = b[idx+1]; 751 tp[2] = b[idx+2]; 752 tp[3] = b[idx+3]; 753 tp[4] = b[idx+4]; 754 tp += 5; 755 } 756 757 for (k=0; k<mbs; k++){ 758 v = aa + 25*ai[k]; 759 vj = aj + ai[k]; 760 tp = t + k*5; 761 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 762 nz = ai[k+1] - ai[k]; 763 764 tp = t + (*vj)*5; 765 while (nz--) { 766 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 767 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 768 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 769 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 770 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 771 vj++; tp = t + (*vj)*5; 772 v += 25; 773 } 774 775 /* xk = inv(Dk)*(Dk*xk) */ 776 diag = aa+k*25; /* ptr to inv(Dk) */ 777 tp = t + k*5; 778 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 779 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 780 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 781 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 782 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 783 } 784 785 /* solve U*x = y by back substitution */ 786 for (k=mbs-1; k>=0; k--){ 787 v = aa + 25*ai[k]; 788 vj = aj + ai[k]; 789 tp = t + k*5; 790 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 791 nz = ai[k+1] - ai[k]; 792 793 tp = t + (*vj)*5; 794 while (nz--) { 795 /* xk += U(k,:)*x(:) */ 796 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 797 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 798 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 799 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 800 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 801 vj++; tp = t + (*vj)*5; 802 v += 25; 803 } 804 tp = t + k*5; 805 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 806 idx = 5*r[k]; 807 x[idx] = x0; 808 x[idx+1] = x1; 809 x[idx+2] = x2; 810 x[idx+3] = x3; 811 x[idx+4] = x4; 812 } 813 814 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 815 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 816 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 817 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 823 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 824 { 825 MatScalar *v,*diag; 826 PetscScalar *xp,x0,x1,x2,x3,x4; 827 PetscInt nz,*vj,k; 828 829 PetscFunctionBegin; 830 for (k=0; k<mbs; k++){ 831 v = aa + 25*ai[k]; 832 xp = x + k*5; 833 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 834 nz = ai[k+1] - ai[k]; 835 vj = aj + ai[k]; 836 xp = x + (*vj)*5; 837 while (nz--) { 838 /* x(:) += U(k,:)^T*(Dk*xk) */ 839 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 840 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 841 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 842 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 843 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 844 vj++; xp = x + (*vj)*5; 845 v += 25; 846 } 847 /* xk = inv(Dk)*(Dk*xk) */ 848 diag = aa+k*25; /* ptr to inv(Dk) */ 849 xp = x + k*5; 850 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 851 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 852 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 853 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 854 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 855 } 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNCT__ 860 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 861 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 862 { 863 MatScalar *v; 864 PetscScalar *xp,x0,x1,x2,x3,x4; 865 PetscInt nz,*vj,k; 866 867 PetscFunctionBegin; 868 for (k=mbs-1; k>=0; k--){ 869 v = aa + 25*ai[k]; 870 xp = x + k*5; 871 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 872 nz = ai[k+1] - ai[k]; 873 vj = aj + ai[k]; 874 xp = x + (*vj)*5; 875 while (nz--) { 876 /* xk += U(k,:)*x(:) */ 877 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 878 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 879 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 880 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 881 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 882 vj++; 883 v += 25; xp = x + (*vj)*5; 884 } 885 xp = x + k*5; 886 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 887 } 888 PetscFunctionReturn(0); 889 } 890 891 #undef __FUNCT__ 892 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 893 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 894 { 895 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 896 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 897 MatScalar *aa=a->a; 898 PetscScalar *x,*b; 899 PetscErrorCode ierr; 900 901 PetscFunctionBegin; 902 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 903 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 904 905 /* solve U^T * D * y = b by forward substitution */ 906 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 907 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 908 909 /* solve U*x = y by back substitution */ 910 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 911 912 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 913 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 914 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering" 920 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 921 { 922 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 923 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 924 MatScalar *aa=a->a; 925 PetscScalar *x,*b; 926 PetscErrorCode ierr; 927 928 PetscFunctionBegin; 929 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 930 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 931 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 932 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 933 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 934 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 935 ierr = PetscLogFlops(25*(a->nz + mbs));CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering" 941 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 942 { 943 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 944 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 945 MatScalar *aa=a->a; 946 PetscScalar *x,*b; 947 PetscErrorCode ierr; 948 949 PetscFunctionBegin; 950 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 951 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 952 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 953 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 954 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 955 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 956 ierr = PetscLogFlops(25*a->nz);CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 962 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 963 { 964 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 965 IS isrow=a->row; 966 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 967 PetscErrorCode ierr; 968 PetscInt nz,*vj,k,*r,idx; 969 MatScalar *aa=a->a,*v,*diag; 970 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 971 972 PetscFunctionBegin; 973 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 974 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 975 t = a->solve_work; 976 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 977 978 /* solve U^T * D * y = b by forward substitution */ 979 tp = t; 980 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 981 idx = 4*r[k]; 982 tp[0] = b[idx]; 983 tp[1] = b[idx+1]; 984 tp[2] = b[idx+2]; 985 tp[3] = b[idx+3]; 986 tp += 4; 987 } 988 989 for (k=0; k<mbs; k++){ 990 v = aa + 16*ai[k]; 991 vj = aj + ai[k]; 992 tp = t + k*4; 993 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 994 nz = ai[k+1] - ai[k]; 995 996 tp = t + (*vj)*4; 997 while (nz--) { 998 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 999 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1000 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1001 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1002 vj++; tp = t + (*vj)*4; 1003 v += 16; 1004 } 1005 1006 /* xk = inv(Dk)*(Dk*xk) */ 1007 diag = aa+k*16; /* ptr to inv(Dk) */ 1008 tp = t + k*4; 1009 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1010 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1011 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1012 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1013 } 1014 1015 /* solve U*x = y by back substitution */ 1016 for (k=mbs-1; k>=0; k--){ 1017 v = aa + 16*ai[k]; 1018 vj = aj + ai[k]; 1019 tp = t + k*4; 1020 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 1021 nz = ai[k+1] - ai[k]; 1022 1023 tp = t + (*vj)*4; 1024 while (nz--) { 1025 /* xk += U(k,:)*x(:) */ 1026 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 1027 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 1028 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 1029 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 1030 vj++; tp = t + (*vj)*4; 1031 v += 16; 1032 } 1033 tp = t + k*4; 1034 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 1035 idx = 4*r[k]; 1036 x[idx] = x0; 1037 x[idx+1] = x1; 1038 x[idx+2] = x2; 1039 x[idx+3] = x3; 1040 } 1041 1042 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1043 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1044 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1045 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1051 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1052 { 1053 MatScalar *v,*diag; 1054 PetscScalar *xp,x0,x1,x2,x3; 1055 PetscInt nz,*vj,k; 1056 1057 PetscFunctionBegin; 1058 for (k=0; k<mbs; k++){ 1059 v = aa + 16*ai[k]; 1060 xp = x + k*4; 1061 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 1062 nz = ai[k+1] - ai[k]; 1063 vj = aj + ai[k]; 1064 xp = x + (*vj)*4; 1065 while (nz--) { 1066 /* x(:) += U(k,:)^T*(Dk*xk) */ 1067 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1068 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1069 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1070 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1071 vj++; xp = x + (*vj)*4; 1072 v += 16; 1073 } 1074 /* xk = inv(Dk)*(Dk*xk) */ 1075 diag = aa+k*16; /* ptr to inv(Dk) */ 1076 xp = x + k*4; 1077 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1078 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1079 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1080 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1087 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1088 { 1089 MatScalar *v; 1090 PetscScalar *xp,x0,x1,x2,x3; 1091 PetscInt nz,*vj,k; 1092 1093 PetscFunctionBegin; 1094 for (k=mbs-1; k>=0; k--){ 1095 v = aa + 16*ai[k]; 1096 xp = x + k*4; 1097 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 1098 nz = ai[k+1] - ai[k]; 1099 vj = aj + ai[k]; 1100 xp = x + (*vj)*4; 1101 while (nz--) { 1102 /* xk += U(k,:)*x(:) */ 1103 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 1104 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 1105 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 1106 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 1107 vj++; 1108 v += 16; xp = x + (*vj)*4; 1109 } 1110 xp = x + k*4; 1111 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 1112 } 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 1118 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1119 { 1120 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1121 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1122 MatScalar *aa=a->a; 1123 PetscScalar *x,*b; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1128 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1129 1130 /* solve U^T * D * y = b by forward substitution */ 1131 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1132 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1133 1134 /* solve U*x = y by back substitution */ 1135 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1136 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1137 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1138 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering" 1144 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1145 { 1146 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1147 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1148 MatScalar *aa=a->a; 1149 PetscScalar *x,*b; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1154 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1155 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1156 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1157 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1158 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1159 ierr = PetscLogFlops(16*a->nz + mbs);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering" 1165 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1166 { 1167 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1168 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1169 MatScalar *aa=a->a; 1170 PetscScalar *x,*b; 1171 PetscErrorCode ierr; 1172 1173 PetscFunctionBegin; 1174 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1175 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1176 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1177 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1178 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1179 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1180 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 1186 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 1187 { 1188 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1189 IS isrow=a->row; 1190 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1191 PetscErrorCode ierr; 1192 PetscInt nz,*vj,k,*r,idx; 1193 MatScalar *aa=a->a,*v,*diag; 1194 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 1195 1196 PetscFunctionBegin; 1197 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1198 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1199 t = a->solve_work; 1200 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1201 1202 /* solve U^T * D * y = b by forward substitution */ 1203 tp = t; 1204 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1205 idx = 3*r[k]; 1206 tp[0] = b[idx]; 1207 tp[1] = b[idx+1]; 1208 tp[2] = b[idx+2]; 1209 tp += 3; 1210 } 1211 1212 for (k=0; k<mbs; k++){ 1213 v = aa + 9*ai[k]; 1214 vj = aj + ai[k]; 1215 tp = t + k*3; 1216 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 1217 nz = ai[k+1] - ai[k]; 1218 1219 tp = t + (*vj)*3; 1220 while (nz--) { 1221 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1222 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1223 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1224 vj++; tp = t + (*vj)*3; 1225 v += 9; 1226 } 1227 1228 /* xk = inv(Dk)*(Dk*xk) */ 1229 diag = aa+k*9; /* ptr to inv(Dk) */ 1230 tp = t + k*3; 1231 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1232 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1233 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1234 } 1235 1236 /* solve U*x = y by back substitution */ 1237 for (k=mbs-1; k>=0; k--){ 1238 v = aa + 9*ai[k]; 1239 vj = aj + ai[k]; 1240 tp = t + k*3; 1241 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 1242 nz = ai[k+1] - ai[k]; 1243 1244 tp = t + (*vj)*3; 1245 while (nz--) { 1246 /* xk += U(k,:)*x(:) */ 1247 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 1248 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 1249 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 1250 vj++; tp = t + (*vj)*3; 1251 v += 9; 1252 } 1253 tp = t + k*3; 1254 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1255 idx = 3*r[k]; 1256 x[idx] = x0; 1257 x[idx+1] = x1; 1258 x[idx+2] = x2; 1259 } 1260 1261 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1262 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1263 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1264 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1270 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1271 { 1272 MatScalar *v,*diag; 1273 PetscScalar *xp,x0,x1,x2; 1274 PetscInt nz,*vj,k; 1275 1276 PetscFunctionBegin; 1277 for (k=0; k<mbs; k++){ 1278 v = aa + 9*ai[k]; 1279 xp = x + k*3; 1280 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1281 nz = ai[k+1] - ai[k]; 1282 vj = aj + ai[k]; 1283 xp = x + (*vj)*3; 1284 while (nz--) { 1285 /* x(:) += U(k,:)^T*(Dk*xk) */ 1286 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1287 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1288 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1289 vj++; xp = x + (*vj)*3; 1290 v += 9; 1291 } 1292 /* xk = inv(Dk)*(Dk*xk) */ 1293 diag = aa+k*9; /* ptr to inv(Dk) */ 1294 xp = x + k*3; 1295 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1296 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1297 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1298 } 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1304 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1305 { 1306 MatScalar *v; 1307 PetscScalar *xp,x0,x1,x2; 1308 PetscInt nz,*vj,k; 1309 1310 PetscFunctionBegin; 1311 for (k=mbs-1; k>=0; k--){ 1312 v = aa + 9*ai[k]; 1313 xp = x + k*3; 1314 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1315 nz = ai[k+1] - ai[k]; 1316 vj = aj + ai[k]; 1317 xp = x + (*vj)*3; 1318 while (nz--) { 1319 /* xk += U(k,:)*x(:) */ 1320 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1321 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1322 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1323 vj++; 1324 v += 9; xp = x + (*vj)*3; 1325 } 1326 xp = x + k*3; 1327 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1328 } 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1334 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1335 { 1336 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1337 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1338 MatScalar *aa=a->a; 1339 PetscScalar *x,*b; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1344 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1345 1346 /* solve U^T * D * y = b by forward substitution */ 1347 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1348 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1349 1350 /* solve U*x = y by back substitution */ 1351 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1352 1353 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1354 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1355 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering" 1361 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1362 { 1363 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1364 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1365 MatScalar *aa=a->a; 1366 PetscScalar *x,*b; 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1371 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1372 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1373 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1374 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1375 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1376 ierr = PetscLogFlops(9*(a->nz + mbs));CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering" 1382 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1383 { 1384 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1385 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1386 MatScalar *aa=a->a; 1387 PetscScalar *x,*b; 1388 PetscErrorCode ierr; 1389 1390 PetscFunctionBegin; 1391 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1392 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1393 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1394 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1395 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1396 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1397 ierr = PetscLogFlops(9*a->nz);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1403 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1404 { 1405 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1406 IS isrow=a->row; 1407 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1408 PetscErrorCode ierr; 1409 PetscInt nz,*vj,k,k2,*r,idx; 1410 MatScalar *aa=a->a,*v,*diag; 1411 PetscScalar *x,*b,x0,x1,*t; 1412 1413 PetscFunctionBegin; 1414 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1415 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1416 t = a->solve_work; 1417 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1418 1419 /* solve U^T * D * y = perm(b) by forward substitution */ 1420 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1421 idx = 2*r[k]; 1422 t[k*2] = b[idx]; 1423 t[k*2+1] = b[idx+1]; 1424 } 1425 for (k=0; k<mbs; k++){ 1426 v = aa + 4*ai[k]; 1427 vj = aj + ai[k]; 1428 k2 = k*2; 1429 x0 = t[k2]; x1 = t[k2+1]; 1430 nz = ai[k+1] - ai[k]; 1431 while (nz--) { 1432 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1433 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1434 vj++; v += 4; 1435 } 1436 diag = aa+k*4; /* ptr to inv(Dk) */ 1437 t[k2] = diag[0]*x0 + diag[2]*x1; 1438 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1439 } 1440 1441 /* solve U*x = y by back substitution */ 1442 for (k=mbs-1; k>=0; k--){ 1443 v = aa + 4*ai[k]; 1444 vj = aj + ai[k]; 1445 k2 = k*2; 1446 x0 = t[k2]; x1 = t[k2+1]; 1447 nz = ai[k+1] - ai[k]; 1448 while (nz--) { 1449 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1450 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1451 vj++; v += 4; 1452 } 1453 t[k2] = x0; 1454 t[k2+1] = x1; 1455 idx = 2*r[k]; 1456 x[idx] = x0; 1457 x[idx+1] = x1; 1458 } 1459 1460 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1461 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1462 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1463 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1469 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1470 { 1471 MatScalar *v,*diag; 1472 PetscScalar x0,x1; 1473 PetscInt nz,*vj,k,k2; 1474 1475 PetscFunctionBegin; 1476 for (k=0; k<mbs; k++){ 1477 v = aa + 4*ai[k]; 1478 vj = aj + ai[k]; 1479 k2 = k*2; 1480 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1481 nz = ai[k+1] - ai[k]; 1482 1483 while (nz--) { 1484 /* x(:) += U(k,:)^T*(Dk*xk) */ 1485 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1486 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1487 vj++; v += 4; 1488 } 1489 /* xk = inv(Dk)*(Dk*xk) */ 1490 diag = aa+k*4; /* ptr to inv(Dk) */ 1491 x[k2] = diag[0]*x0 + diag[2]*x1; 1492 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1493 } 1494 PetscFunctionReturn(0); 1495 } 1496 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1499 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1500 { 1501 MatScalar *v; 1502 PetscScalar x0,x1; 1503 PetscInt nz,*vj,k,k2; 1504 1505 PetscFunctionBegin; 1506 for (k=mbs-1; k>=0; k--){ 1507 v = aa + 4*ai[k]; 1508 vj = aj + ai[k]; 1509 k2 = k*2; 1510 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1511 nz = ai[k+1] - ai[k]; 1512 while (nz--) { 1513 /* xk += U(k,:)*x(:) */ 1514 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1515 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1516 vj++; v += 4; 1517 } 1518 x[k2] = x0; 1519 x[k2+1] = x1; 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1526 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1527 { 1528 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1529 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1530 MatScalar *aa=a->a; 1531 PetscScalar *x,*b; 1532 PetscErrorCode ierr; 1533 1534 PetscFunctionBegin; 1535 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1536 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1537 1538 /* solve U^T * D * y = b by forward substitution */ 1539 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1540 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1541 1542 /* solve U*x = y by back substitution */ 1543 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1544 1545 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1546 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1547 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */ 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering" 1553 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1554 { 1555 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1556 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1557 MatScalar *aa=a->a; 1558 PetscScalar *x,*b; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1563 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1564 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1565 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1566 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1567 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1568 ierr = PetscLogFlops(4*(a->nz + mbs));CHKERRQ(ierr); 1569 PetscFunctionReturn(0); 1570 } 1571 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering" 1574 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1575 { 1576 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1577 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1578 MatScalar *aa=a->a; 1579 PetscScalar *x,*b; 1580 PetscErrorCode ierr; 1581 1582 PetscFunctionBegin; 1583 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1584 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1585 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1586 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1587 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1588 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1589 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1595 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1596 { 1597 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1598 IS isrow=a->row; 1599 PetscErrorCode ierr; 1600 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1601 MatScalar *aa=a->a,*v; 1602 PetscScalar *x,*b,xk,*t; 1603 PetscInt nz,*vj,k; 1604 1605 PetscFunctionBegin; 1606 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1607 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1608 t = a->solve_work; 1609 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1610 1611 /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */ 1612 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1613 for (k=0; k<mbs; k++){ 1614 v = aa + ai[k] + 1; 1615 vj = aj + ai[k] + 1; 1616 xk = t[k]; 1617 nz = ai[k+1] - ai[k] - 1; 1618 while (nz--) t[*vj++] += (*v++) * xk; 1619 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1620 } 1621 1622 /* solve U*perm(x) = y by back substitution */ 1623 for (k=mbs-1; k>=0; k--){ 1624 v = aa + ai[k] + 1; 1625 vj = aj + ai[k] + 1; 1626 nz = ai[k+1] - ai[k] - 1; 1627 while (nz--) t[k] += (*v++) * t[*vj++]; 1628 x[rp[k]] = t[k]; 1629 } 1630 1631 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1632 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1633 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1634 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1" 1640 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1641 { 1642 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1643 IS isrow=a->row; 1644 PetscErrorCode ierr; 1645 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1646 MatScalar *aa=a->a,*v; 1647 PetscReal diagk; 1648 PetscScalar *x,*b,xk; 1649 PetscInt nz,*vj,k; 1650 1651 PetscFunctionBegin; 1652 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ 1653 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1654 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1655 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1656 1657 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1658 for (k=0; k<mbs; k++){ 1659 v = aa + ai[k] + 1; 1660 vj = aj + ai[k] + 1; 1661 xk = x[k]; 1662 nz = ai[k+1] - ai[k] - 1; 1663 while (nz--) x[*vj++] += (*v++) * xk; 1664 1665 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1666 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1667 x[k] = xk*sqrt(diagk); 1668 } 1669 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1670 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1671 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1672 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1673 PetscFunctionReturn(0); 1674 } 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1" 1678 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1679 { 1680 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1681 IS isrow=a->row; 1682 PetscErrorCode ierr; 1683 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1684 MatScalar *aa=a->a,*v; 1685 PetscReal diagk; 1686 PetscScalar *x,*b,*t; 1687 PetscInt nz,*vj,k; 1688 1689 PetscFunctionBegin; 1690 /* solve D^(1/2)*U*perm(x) = b by back substitution */ 1691 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1692 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1693 t = a->solve_work; 1694 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1695 1696 for (k=mbs-1; k>=0; k--){ 1697 v = aa + ai[k] + 1; 1698 vj = aj + ai[k] + 1; 1699 diagk = PetscRealPart(aa[ai[k]]); 1700 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1701 t[k] = b[k] * sqrt(diagk); 1702 nz = ai[k+1] - ai[k] - 1; 1703 while (nz--) t[k] += (*v++) * t[*vj++]; 1704 x[rp[k]] = t[k]; 1705 } 1706 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1707 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1708 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1709 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1710 PetscFunctionReturn(0); 1711 } 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1715 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1716 { 1717 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1718 PetscErrorCode ierr; 1719 1720 PetscFunctionBegin; 1721 if (A->rmap->bs == 1) { 1722 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1723 } else { 1724 IS isrow=a->row; 1725 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,i; 1726 MatScalar *aa=a->a,*v; 1727 PetscScalar *x,*b,*t; 1728 PetscInt nz,*vj,k,n; 1729 if (bb->n > a->solves_work_n) { 1730 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1731 ierr = PetscMalloc(bb->n*A->rmap->N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1732 a->solves_work_n = bb->n; 1733 } 1734 n = bb->n; 1735 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1736 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1737 t = a->solves_work; 1738 1739 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1740 1741 /* solve U^T*D*y = perm(b) by forward substitution */ 1742 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */ 1743 for (k=0; k<mbs; k++){ 1744 v = aa + ai[k]; 1745 vj = aj + ai[k]; 1746 nz = ai[k+1] - ai[k]; 1747 while (nz--) { 1748 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1749 v++;vj++; 1750 } 1751 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1752 } 1753 1754 /* solve U*perm(x) = y by back substitution */ 1755 for (k=mbs-1; k>=0; k--){ 1756 v = aa + ai[k]; 1757 vj = aj + ai[k]; 1758 nz = ai[k+1] - ai[k]; 1759 while (nz--) { 1760 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1761 v++;vj++; 1762 } 1763 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i]; 1764 } 1765 1766 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1767 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1768 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1769 ierr = PetscLogFlops(bb->n*(4*a->nz + A->rmap->N));CHKERRQ(ierr); 1770 } 1771 PetscFunctionReturn(0); 1772 } 1773 1774 /* 1775 Special case where the matrix was ILU(0) factored in the natural 1776 ordering. This eliminates the need for the column and row permutation. 1777 */ 1778 #undef __FUNCT__ 1779 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1780 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1781 { 1782 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1783 PetscErrorCode ierr; 1784 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1785 MatScalar *aa=a->a,*v; 1786 PetscScalar *x,*b,xk; 1787 PetscInt nz,*vj,k; 1788 1789 PetscFunctionBegin; 1790 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1791 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1792 1793 /* solve U^T*D*y = b by forward substitution */ 1794 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1795 for (k=0; k<mbs; k++){ 1796 v = aa + ai[k] + 1; 1797 vj = aj + ai[k] + 1; 1798 xk = x[k]; 1799 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1800 while (nz--) x[*vj++] += (*v++) * xk; 1801 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1802 } 1803 1804 /* solve U*x = y by back substitution */ 1805 for (k=mbs-2; k>=0; k--){ 1806 v = aa + ai[k] + 1; 1807 vj = aj + ai[k] + 1; 1808 xk = x[k]; 1809 nz = ai[k+1] - ai[k] - 1; 1810 while (nz--) xk += (*v++) * x[*vj++]; 1811 x[k] = xk; 1812 } 1813 1814 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1815 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1816 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 1817 PetscFunctionReturn(0); 1818 } 1819 1820 #undef __FUNCT__ 1821 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" 1822 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1823 { 1824 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1825 PetscErrorCode ierr; 1826 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1827 MatScalar *aa=a->a,*v; 1828 PetscReal diagk; 1829 PetscScalar *x,*b; 1830 PetscInt nz,*vj,k; 1831 1832 PetscFunctionBegin; 1833 /* solve U^T*D^(1/2)*x = b by forward substitution */ 1834 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1835 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1836 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1837 for (k=0; k<mbs; k++){ 1838 v = aa + ai[k] + 1; 1839 vj = aj + ai[k] + 1; 1840 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1841 while (nz--) x[*vj++] += (*v++) * x[k]; 1842 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1843 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]])); 1844 x[k] *= sqrt(diagk); 1845 } 1846 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1847 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1848 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" 1854 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1855 { 1856 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1857 PetscErrorCode ierr; 1858 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1859 MatScalar *aa=a->a,*v; 1860 PetscReal diagk; 1861 PetscScalar *x,*b; 1862 PetscInt nz,*vj,k; 1863 1864 PetscFunctionBegin; 1865 /* solve D^(1/2)*U*x = b by back substitution */ 1866 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1867 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1868 1869 for (k=mbs-1; k>=0; k--){ 1870 v = aa + ai[k] + 1; 1871 vj = aj + ai[k] + 1; 1872 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1873 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1874 x[k] = sqrt(diagk)*b[k]; 1875 nz = ai[k+1] - ai[k] - 1; 1876 while (nz--) x[k] += (*v++) * x[*vj++]; 1877 } 1878 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1879 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1880 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1881 PetscFunctionReturn(0); 1882 } 1883 1884 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth); 1885 1886 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 1889 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1890 { 1891 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1892 PetscErrorCode ierr; 1893 PetscInt *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1894 PetscInt *jutmp,bs = A->rmap->bs,bs2=a->bs2; 1895 PetscInt m,reallocs = 0,*levtmp; 1896 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 1897 PetscInt incrlev,*lev,shift,prow,nz; 1898 PetscReal f = info->fill,levels = info->levels; 1899 PetscTruth perm_identity; 1900 1901 PetscFunctionBegin; 1902 /* check whether perm is the identity mapping */ 1903 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1904 1905 if (perm_identity){ 1906 a->permute = PETSC_FALSE; 1907 ai = a->i; aj = a->j; 1908 } else { /* non-trivial permutation */ 1909 a->permute = PETSC_TRUE; 1910 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1911 ai = a->inew; aj = a->jnew; 1912 } 1913 1914 /* initialization */ 1915 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1916 umax = (PetscInt)(f*ai[mbs] + 1); 1917 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 1918 umax += mbs + 1; 1919 shift = mbs + 1; 1920 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 1921 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 1922 iu[0] = mbs + 1; 1923 juidx = mbs + 1; 1924 /* prowl: linked list for pivot row */ 1925 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr); 1926 /* q: linked list for col index */ 1927 q = prowl + mbs; 1928 levtmp = q + mbs; 1929 1930 for (i=0; i<mbs; i++){ 1931 prowl[i] = mbs; 1932 q[i] = 0; 1933 } 1934 1935 /* for each row k */ 1936 for (k=0; k<mbs; k++){ 1937 nzk = 0; 1938 q[k] = mbs; 1939 /* copy current row into linked list */ 1940 nz = ai[rip[k]+1] - ai[rip[k]]; 1941 j = ai[rip[k]]; 1942 while (nz--){ 1943 vj = rip[aj[j++]]; 1944 if (vj > k){ 1945 qm = k; 1946 do { 1947 m = qm; qm = q[m]; 1948 } while(qm < vj); 1949 if (qm == vj) { 1950 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 1951 } 1952 nzk++; 1953 q[m] = vj; 1954 q[vj] = qm; 1955 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1956 } 1957 } 1958 1959 /* modify nonzero structure of k-th row by computing fill-in 1960 for each row prow to be merged in */ 1961 prow = k; 1962 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1963 1964 while (prow < k){ 1965 /* merge row prow into k-th row */ 1966 jmin = iu[prow] + 1; 1967 jmax = iu[prow+1]; 1968 qm = k; 1969 for (j=jmin; j<jmax; j++){ 1970 incrlev = lev[j-shift] + 1; 1971 if (incrlev > levels) continue; 1972 vj = ju[j]; 1973 do { 1974 m = qm; qm = q[m]; 1975 } while (qm < vj); 1976 if (qm != vj){ /* a fill */ 1977 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1978 levtmp[vj] = incrlev; 1979 } else { 1980 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1981 } 1982 } 1983 prow = prowl[prow]; /* next pivot row */ 1984 } 1985 1986 /* add k to row list for first nonzero element in k-th row */ 1987 if (nzk > 1){ 1988 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1989 prowl[k] = prowl[i]; prowl[i] = k; 1990 } 1991 iu[k+1] = iu[k] + nzk; 1992 1993 /* allocate more space to ju and lev if needed */ 1994 if (iu[k+1] > umax) { 1995 /* estimate how much additional space we will need */ 1996 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1997 /* just double the memory each time */ 1998 maxadd = umax; 1999 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 2000 umax += maxadd; 2001 2002 /* allocate a longer ju */ 2003 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2004 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 2005 ierr = PetscFree(ju);CHKERRQ(ierr); 2006 ju = jutmp; 2007 2008 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2009 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 2010 ierr = PetscFree(lev);CHKERRQ(ierr); 2011 lev = jutmp; 2012 reallocs += 2; /* count how many times we realloc */ 2013 } 2014 2015 /* save nonzero structure of k-th row in ju */ 2016 i=k; 2017 while (nzk --) { 2018 i = q[i]; 2019 ju[juidx] = i; 2020 lev[juidx-shift] = levtmp[i]; 2021 juidx++; 2022 } 2023 } 2024 2025 #if defined(PETSC_USE_INFO) 2026 if (ai[mbs] != 0) { 2027 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2028 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2029 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2030 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 2031 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2032 } else { 2033 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2034 } 2035 #endif 2036 2037 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2038 ierr = PetscFree(prowl);CHKERRQ(ierr); 2039 ierr = PetscFree(lev);CHKERRQ(ierr); 2040 2041 /* put together the new matrix */ 2042 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 2043 2044 /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */ 2045 b = (Mat_SeqSBAIJ*)(*B)->data; 2046 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2047 b->singlemalloc = PETSC_FALSE; 2048 b->free_a = PETSC_TRUE; 2049 b->free_ij = PETSC_TRUE; 2050 /* the next line frees the default space generated by the Create() */ 2051 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 2052 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 2053 b->j = ju; 2054 b->i = iu; 2055 b->diag = 0; 2056 b->ilen = 0; 2057 b->imax = 0; 2058 2059 if (b->row) { 2060 ierr = ISDestroy(b->row);CHKERRQ(ierr); 2061 } 2062 if (b->icol) { 2063 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 2064 } 2065 b->row = perm; 2066 b->icol = perm; 2067 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2068 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2069 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2070 /* In b structure: Free imax, ilen, old a, old j. 2071 Allocate idnew, solve_work, new a, new j */ 2072 ierr = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2073 b->maxnz = b->nz = iu[mbs]; 2074 2075 (*B)->factor = MAT_FACTOR_CHOLESKY; 2076 (*B)->info.factor_mallocs = reallocs; 2077 (*B)->info.fill_ratio_given = f; 2078 if (ai[mbs] != 0) { 2079 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2080 } else { 2081 (*B)->info.fill_ratio_needed = 0.0; 2082 } 2083 ierr = MatSeqSBAIJSetNumericFactorization(*B,perm_identity);CHKERRQ(ierr); 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #include "petscbt.h" 2088 #include "src/mat/utils/freespace.h" 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 2091 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 2092 { 2093 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2094 Mat_SeqSBAIJ *b; 2095 PetscErrorCode ierr; 2096 PetscTruth perm_identity,free_ij = PETSC_TRUE,missing; 2097 PetscInt bs=A->rmap->bs,am=a->mbs,d; 2098 PetscInt reallocs=0,*rip,i,*ai,*aj,*ui; 2099 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2100 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 2101 PetscReal fill=info->fill,levels=info->levels,ratio_needed; 2102 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2103 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2104 PetscBT lnkbt; 2105 2106 PetscFunctionBegin; 2107 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2108 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2109 2110 /* 2111 This code originally uses Modified Sparse Row (MSR) storage 2112 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 2113 Then it is rewritten so the factor B takes seqsbaij format. However the associated 2114 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 2115 thus the original code in MSR format is still used for these cases. 2116 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 2117 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 2118 */ 2119 if (bs > 1){ 2120 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr); 2121 PetscFunctionReturn(0); 2122 } 2123 2124 /* check whether perm is the identity mapping */ 2125 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2126 2127 /* special case that simply copies fill pattern */ 2128 if (!levels && perm_identity) { 2129 a->permute = PETSC_FALSE; 2130 /* reuse the column pointers and row offsets for memory savings */ 2131 ui = a->i; 2132 uj = a->j; 2133 free_ij = PETSC_FALSE; 2134 ratio_needed = 1.0; 2135 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2136 if (perm_identity){ 2137 a->permute = PETSC_FALSE; 2138 ai = a->i; aj = a->j; 2139 } else { /* non-trivial permutation */ 2140 a->permute = PETSC_TRUE; 2141 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2142 ai = a->inew; aj = a->jnew; 2143 } 2144 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2145 2146 /* initialization */ 2147 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2148 ui[0] = 0; 2149 2150 /* jl: linked list for storing indices of the pivot rows 2151 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2152 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 2153 il = jl + am; 2154 uj_ptr = (PetscInt**)(il + am); 2155 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 2156 for (i=0; i<am; i++){ 2157 jl[i] = am; il[i] = 0; 2158 } 2159 2160 /* create and initialize a linked list for storing column indices of the active row k */ 2161 nlnk = am + 1; 2162 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2163 2164 /* initial FreeSpace size is fill*(ai[am]+1) */ 2165 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2166 current_space = free_space; 2167 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2168 current_space_lvl = free_space_lvl; 2169 2170 for (k=0; k<am; k++){ /* for each active row k */ 2171 /* initialize lnk by the column indices of row rip[k] */ 2172 nzk = 0; 2173 ncols = ai[rip[k]+1] - ai[rip[k]]; 2174 cols = aj+ai[rip[k]]; 2175 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2176 nzk += nlnk; 2177 2178 /* update lnk by computing fill-in for each pivot row to be merged in */ 2179 prow = jl[k]; /* 1st pivot row */ 2180 2181 while (prow < k){ 2182 nextprow = jl[prow]; 2183 2184 /* merge prow into k-th row */ 2185 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2186 jmax = ui[prow+1]; 2187 ncols = jmax-jmin; 2188 i = jmin - ui[prow]; 2189 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2190 j = *(uj_lvl_ptr[prow] + i - 1); 2191 cols_lvl = uj_lvl_ptr[prow]+i; 2192 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2193 nzk += nlnk; 2194 2195 /* update il and jl for prow */ 2196 if (jmin < jmax){ 2197 il[prow] = jmin; 2198 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2199 } 2200 prow = nextprow; 2201 } 2202 2203 /* if free space is not available, make more free space */ 2204 if (current_space->local_remaining<nzk) { 2205 i = am - k + 1; /* num of unfactored rows */ 2206 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2207 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2208 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2209 reallocs++; 2210 } 2211 2212 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2213 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2214 2215 /* add the k-th row into il and jl */ 2216 if (nzk-1 > 0){ 2217 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2218 jl[k] = jl[i]; jl[i] = k; 2219 il[k] = ui[k] + 1; 2220 } 2221 uj_ptr[k] = current_space->array; 2222 uj_lvl_ptr[k] = current_space_lvl->array; 2223 2224 current_space->array += nzk; 2225 current_space->local_used += nzk; 2226 current_space->local_remaining -= nzk; 2227 current_space_lvl->array += nzk; 2228 current_space_lvl->local_used += nzk; 2229 current_space_lvl->local_remaining -= nzk; 2230 2231 ui[k+1] = ui[k] + nzk; 2232 } 2233 2234 #if defined(PETSC_USE_INFO) 2235 if (ai[am] != 0) { 2236 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2237 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2238 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2239 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2240 } else { 2241 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2242 } 2243 #endif 2244 2245 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2246 ierr = PetscFree(jl);CHKERRQ(ierr); 2247 2248 /* destroy list of free space and other temporary array(s) */ 2249 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2250 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2251 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2252 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2253 if (ai[am] != 0) { 2254 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2255 } else { 2256 ratio_needed = 0.0; 2257 } 2258 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2259 2260 /* put together the new matrix in MATSEQSBAIJ format */ 2261 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2262 2263 b = (Mat_SeqSBAIJ*)(*fact)->data; 2264 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2265 b->singlemalloc = PETSC_FALSE; 2266 b->free_a = PETSC_TRUE; 2267 b->free_ij = free_ij; 2268 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2269 b->j = uj; 2270 b->i = ui; 2271 b->diag = 0; 2272 b->ilen = 0; 2273 b->imax = 0; 2274 b->row = perm; 2275 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2276 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2277 b->icol = perm; 2278 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2279 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2280 b->maxnz = b->nz = ui[am]; 2281 2282 (*fact)->factor = MAT_FACTOR_CHOLESKY; 2283 (*fact)->info.factor_mallocs = reallocs; 2284 (*fact)->info.fill_ratio_given = fill; 2285 (*fact)->info.fill_ratio_needed = ratio_needed; 2286 ierr = MatSeqSBAIJSetNumericFactorization(*fact,perm_identity);CHKERRQ(ierr); 2287 PetscFunctionReturn(0); 2288 } 2289 2290