1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for SBAIJ format. 5 */ 6 7 #include "../src/mat/impls/sbaij/seq/sbaij.h" 8 #include "../src/mat/impls/baij/seq/baij.h" 9 #include "../src/inline/ilu.h" 10 #include "../src/inline/dot.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 15 { 16 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 17 IS isrow=a->row; 18 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 19 PetscErrorCode ierr; 20 const PetscInt *r; 21 PetscInt nz,*vj,k,idx,k1; 22 PetscInt bs=A->rmap->bs,bs2 = a->bs2; 23 MatScalar *aa=a->a,*v,*diag; 24 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 25 26 PetscFunctionBegin; 27 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 28 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 29 t = a->solve_work; 30 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 31 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 32 33 /* solve U^T * D * y = b by forward substitution */ 34 xk = t; 35 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 36 idx = bs*r[k]; 37 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 38 } 39 for (k=0; k<mbs; k++){ 40 v = aa + bs2*ai[k]; 41 xk = t + k*bs; /* Dk*xk = k-th block of x */ 42 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 43 nz = ai[k+1] - ai[k]; 44 vj = aj + ai[k]; 45 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 46 while (nz--) { 47 /* x(:) += U(k,:)^T*(Dk*xk) */ 48 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 49 vj++; xj = t + (*vj)*bs; 50 v += bs2; 51 } 52 /* xk = inv(Dk)*(Dk*xk) */ 53 diag = aa+k*bs2; /* ptr to inv(Dk) */ 54 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 55 } 56 57 /* solve U*x = y by back substitution */ 58 for (k=mbs-1; k>=0; k--){ 59 v = aa + bs2*ai[k]; 60 xk = t + k*bs; /* xk */ 61 nz = ai[k+1] - ai[k]; 62 vj = aj + ai[k]; 63 xj = t + (*vj)*bs; 64 while (nz--) { 65 /* xk += U(k,:)*x(:) */ 66 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 67 vj++; 68 v += bs2; xj = t + (*vj)*bs; 69 } 70 idx = bs*r[k]; 71 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 72 } 73 74 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 75 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 76 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 77 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 78 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N" 84 PetscErrorCode MatForwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 85 { 86 PetscFunctionBegin; 87 SETERRQ(1,"not implemented yet"); 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N" 93 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 94 { 95 PetscFunctionBegin; 96 SETERRQ(1,"not implemented yet"); 97 PetscFunctionReturn(0); 98 } 99 100 #undef __FUNCT__ 101 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 102 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 103 { 104 PetscErrorCode ierr; 105 PetscInt nz,*vj,k; 106 PetscInt bs2 = bs*bs; 107 MatScalar *v,*diag; 108 PetscScalar *xk,*xj,*xk_tmp; 109 110 PetscFunctionBegin; 111 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 112 for (k=0; k<mbs; k++){ 113 v = aa + bs2*ai[k]; 114 xk = x + k*bs; /* Dk*xk = k-th block of x */ 115 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 116 nz = ai[k+1] - ai[k]; 117 vj = aj + ai[k]; 118 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 119 while (nz--) { 120 /* x(:) += U(k,:)^T*(Dk*xk) */ 121 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 122 vj++; xj = x + (*vj)*bs; 123 v += bs2; 124 } 125 /* xk = inv(Dk)*(Dk*xk) */ 126 diag = aa+k*bs2; /* ptr to inv(Dk) */ 127 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 128 } 129 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 135 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 136 { 137 PetscInt nz,*vj,k; 138 PetscInt bs2 = bs*bs; 139 MatScalar *v; 140 PetscScalar *xk,*xj; 141 142 PetscFunctionBegin; 143 for (k=mbs-1; k>=0; k--){ 144 v = aa + bs2*ai[k]; 145 xk = x + k*bs; /* xk */ 146 nz = ai[k+1] - ai[k]; 147 vj = aj + ai[k]; 148 xj = x + (*vj)*bs; 149 while (nz--) { 150 /* xk += U(k,:)*x(:) */ 151 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 152 vj++; 153 v += bs2; xj = x + (*vj)*bs; 154 } 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 161 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 162 { 163 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 164 PetscErrorCode ierr; 165 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 166 PetscInt bs=A->rmap->bs; 167 MatScalar *aa=a->a; 168 PetscScalar *x,*b; 169 #if defined(PETSC_USE_LOG) 170 PetscInt bs2 = a->bs2; 171 #endif 172 173 PetscFunctionBegin; 174 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 175 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 176 177 /* solve U^T * D * y = b by forward substitution */ 178 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 179 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 180 181 /* solve U*x = y by back substitution */ 182 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 183 184 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 185 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 186 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering" 192 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 193 { 194 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 195 PetscErrorCode ierr; 196 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 197 PetscInt bs=A->rmap->bs; 198 MatScalar *aa=a->a; 199 PetscScalar *x,*b; 200 #if defined(PETSC_USE_LOG) 201 PetscInt bs2 = a->bs2; 202 #endif 203 204 PetscFunctionBegin; 205 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 206 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 207 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 208 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 209 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 210 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 211 ierr = PetscLogFlops(bs2*a->nz + A->rmap->N);CHKERRQ(ierr); 212 PetscFunctionReturn(0); 213 } 214 215 #undef __FUNCT__ 216 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering" 217 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 218 { 219 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 220 PetscErrorCode ierr; 221 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 222 PetscInt bs=A->rmap->bs; 223 MatScalar *aa=a->a; 224 PetscScalar *x,*b; 225 #if defined(PETSC_USE_LOG) 226 PetscInt bs2 = a->bs2; 227 #endif 228 229 PetscFunctionBegin; 230 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 231 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 232 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 233 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 234 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 235 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 236 ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr); 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 242 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 243 { 244 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 245 IS isrow=a->row; 246 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 247 PetscErrorCode ierr; 248 const PetscInt *r; 249 PetscInt nz,*vj,k,idx; 250 MatScalar *aa=a->a,*v,*d; 251 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 252 253 PetscFunctionBegin; 254 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 255 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 256 t = a->solve_work; 257 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 258 259 /* solve U^T * D * y = b by forward substitution */ 260 tp = t; 261 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 262 idx = 7*r[k]; 263 tp[0] = b[idx]; 264 tp[1] = b[idx+1]; 265 tp[2] = b[idx+2]; 266 tp[3] = b[idx+3]; 267 tp[4] = b[idx+4]; 268 tp[5] = b[idx+5]; 269 tp[6] = b[idx+6]; 270 tp += 7; 271 } 272 273 for (k=0; k<mbs; k++){ 274 v = aa + 49*ai[k]; 275 vj = aj + ai[k]; 276 tp = t + k*7; 277 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 278 nz = ai[k+1] - ai[k]; 279 tp = t + (*vj)*7; 280 while (nz--) { 281 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 282 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 283 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 284 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 285 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 286 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 287 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 288 vj++; tp = t + (*vj)*7; 289 v += 49; 290 } 291 292 /* xk = inv(Dk)*(Dk*xk) */ 293 d = aa+k*49; /* ptr to inv(Dk) */ 294 tp = t + k*7; 295 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 296 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 297 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 298 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 299 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 300 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 301 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 302 } 303 304 /* solve U*x = y by back substitution */ 305 for (k=mbs-1; k>=0; k--){ 306 v = aa + 49*ai[k]; 307 vj = aj + ai[k]; 308 tp = t + k*7; 309 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 310 nz = ai[k+1] - ai[k]; 311 312 tp = t + (*vj)*7; 313 while (nz--) { 314 /* xk += U(k,:)*x(:) */ 315 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 316 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 317 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 318 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 319 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 320 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 321 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 322 vj++; tp = t + (*vj)*7; 323 v += 49; 324 } 325 tp = t + k*7; 326 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 327 idx = 7*r[k]; 328 x[idx] = x0; 329 x[idx+1] = x1; 330 x[idx+2] = x2; 331 x[idx+3] = x3; 332 x[idx+4] = x4; 333 x[idx+5] = x5; 334 x[idx+6] = x6; 335 } 336 337 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 338 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 339 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 340 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 346 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 347 { 348 MatScalar *v,*d; 349 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 350 PetscInt nz,*vj,k; 351 352 PetscFunctionBegin; 353 for (k=0; k<mbs; k++){ 354 v = aa + 49*ai[k]; 355 xp = x + k*7; 356 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 357 nz = ai[k+1] - ai[k]; 358 vj = aj + ai[k]; 359 xp = x + (*vj)*7; 360 while (nz--) { 361 /* x(:) += U(k,:)^T*(Dk*xk) */ 362 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 363 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 364 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 365 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 366 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 367 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 368 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 369 vj++; xp = x + (*vj)*7; 370 v += 49; 371 } 372 /* xk = inv(Dk)*(Dk*xk) */ 373 d = aa+k*49; /* ptr to inv(Dk) */ 374 xp = x + k*7; 375 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 376 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 377 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 378 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 379 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 380 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 381 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 382 } 383 PetscFunctionReturn(0); 384 } 385 386 #undef __FUNCT__ 387 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 388 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 389 { 390 MatScalar *v; 391 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 392 PetscInt nz,*vj,k; 393 394 PetscFunctionBegin; 395 for (k=mbs-1; k>=0; k--){ 396 v = aa + 49*ai[k]; 397 xp = x + k*7; 398 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 399 nz = ai[k+1] - ai[k]; 400 vj = aj + ai[k]; 401 xp = x + (*vj)*7; 402 while (nz--) { 403 /* xk += U(k,:)*x(:) */ 404 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 405 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 406 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 407 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 408 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 409 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 410 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 411 vj++; 412 v += 49; xp = x + (*vj)*7; 413 } 414 xp = x + k*7; 415 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 416 } 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 422 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 423 { 424 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 425 PetscErrorCode ierr; 426 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 427 MatScalar *aa=a->a; 428 PetscScalar *x,*b; 429 430 PetscFunctionBegin; 431 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 432 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 433 434 /* solve U^T * D * y = b by forward substitution */ 435 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 436 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 437 438 /* solve U*x = y by back substitution */ 439 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 440 441 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 442 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 443 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446 447 #undef __FUNCT__ 448 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering" 449 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 450 { 451 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 452 PetscErrorCode ierr; 453 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 454 MatScalar *aa=a->a; 455 PetscScalar *x,*b; 456 457 PetscFunctionBegin; 458 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 459 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 460 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 461 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 462 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 463 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 464 ierr = PetscLogFlops(49*a->nz + mbs);CHKERRQ(ierr); 465 PetscFunctionReturn(0); 466 } 467 468 #undef __FUNCT__ 469 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering" 470 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 471 { 472 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 473 PetscErrorCode ierr; 474 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 475 MatScalar *aa=a->a; 476 PetscScalar *x,*b; 477 478 PetscFunctionBegin; 479 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 480 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 481 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 482 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 483 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 484 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 485 ierr = PetscLogFlops(49*a->nz);CHKERRQ(ierr); 486 PetscFunctionReturn(0); 487 } 488 489 #undef __FUNCT__ 490 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 491 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 492 { 493 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 494 IS isrow=a->row; 495 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 496 PetscErrorCode ierr; 497 const PetscInt *r; 498 PetscInt nz,*vj,k,idx; 499 MatScalar *aa=a->a,*v,*d; 500 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 501 502 PetscFunctionBegin; 503 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 504 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 505 t = a->solve_work; 506 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 507 508 /* solve U^T * D * y = b by forward substitution */ 509 tp = t; 510 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 511 idx = 6*r[k]; 512 tp[0] = b[idx]; 513 tp[1] = b[idx+1]; 514 tp[2] = b[idx+2]; 515 tp[3] = b[idx+3]; 516 tp[4] = b[idx+4]; 517 tp[5] = b[idx+5]; 518 tp += 6; 519 } 520 521 for (k=0; k<mbs; k++){ 522 v = aa + 36*ai[k]; 523 vj = aj + ai[k]; 524 tp = t + k*6; 525 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 526 nz = ai[k+1] - ai[k]; 527 tp = t + (*vj)*6; 528 while (nz--) { 529 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 530 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 531 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 532 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 533 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 534 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 535 vj++; tp = t + (*vj)*6; 536 v += 36; 537 } 538 539 /* xk = inv(Dk)*(Dk*xk) */ 540 d = aa+k*36; /* ptr to inv(Dk) */ 541 tp = t + k*6; 542 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 543 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 544 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 545 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 546 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 547 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 548 } 549 550 /* solve U*x = y by back substitution */ 551 for (k=mbs-1; k>=0; k--){ 552 v = aa + 36*ai[k]; 553 vj = aj + ai[k]; 554 tp = t + k*6; 555 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 556 nz = ai[k+1] - ai[k]; 557 558 tp = t + (*vj)*6; 559 while (nz--) { 560 /* xk += U(k,:)*x(:) */ 561 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 562 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 563 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 564 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 565 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 566 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 567 vj++; tp = t + (*vj)*6; 568 v += 36; 569 } 570 tp = t + k*6; 571 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 572 idx = 6*r[k]; 573 x[idx] = x0; 574 x[idx+1] = x1; 575 x[idx+2] = x2; 576 x[idx+3] = x3; 577 x[idx+4] = x4; 578 x[idx+5] = x5; 579 } 580 581 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 582 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 583 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 584 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 590 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 591 { 592 MatScalar *v,*d; 593 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 594 PetscInt nz,*vj,k; 595 596 PetscFunctionBegin; 597 for (k=0; k<mbs; k++){ 598 v = aa + 36*ai[k]; 599 xp = x + k*6; 600 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 601 nz = ai[k+1] - ai[k]; 602 vj = aj + ai[k]; 603 xp = x + (*vj)*6; 604 while (nz--) { 605 /* x(:) += U(k,:)^T*(Dk*xk) */ 606 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 607 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 608 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 609 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 610 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 611 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 612 vj++; xp = x + (*vj)*6; 613 v += 36; 614 } 615 /* xk = inv(Dk)*(Dk*xk) */ 616 d = aa+k*36; /* ptr to inv(Dk) */ 617 xp = x + k*6; 618 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 619 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 620 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 621 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 622 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 623 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 624 } 625 PetscFunctionReturn(0); 626 } 627 #undef __FUNCT__ 628 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 629 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 630 { 631 MatScalar *v; 632 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 633 PetscInt nz,*vj,k; 634 635 PetscFunctionBegin; 636 for (k=mbs-1; k>=0; k--){ 637 v = aa + 36*ai[k]; 638 xp = x + k*6; 639 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 640 nz = ai[k+1] - ai[k]; 641 vj = aj + ai[k]; 642 xp = x + (*vj)*6; 643 while (nz--) { 644 /* xk += U(k,:)*x(:) */ 645 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 646 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 647 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 648 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 649 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 650 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 651 vj++; 652 v += 36; xp = x + (*vj)*6; 653 } 654 xp = x + k*6; 655 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 656 } 657 PetscFunctionReturn(0); 658 } 659 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 663 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 664 { 665 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 666 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 667 MatScalar *aa=a->a; 668 PetscScalar *x,*b; 669 PetscErrorCode ierr; 670 671 PetscFunctionBegin; 672 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 673 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 674 675 /* solve U^T * D * y = b by forward substitution */ 676 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 677 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 678 679 /* solve U*x = y by back substitution */ 680 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 681 682 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 683 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 684 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering" 690 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 691 { 692 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 693 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 694 MatScalar *aa=a->a; 695 PetscScalar *x,*b; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 700 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 701 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 702 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 703 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 704 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 705 ierr = PetscLogFlops(36*a->nz + mbs);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNCT__ 710 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering" 711 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 712 { 713 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 714 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 715 MatScalar *aa=a->a; 716 PetscScalar *x,*b; 717 PetscErrorCode ierr; 718 719 PetscFunctionBegin; 720 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 721 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 722 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 723 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 724 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 725 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 726 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 732 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 733 { 734 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 735 IS isrow=a->row; 736 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 737 PetscErrorCode ierr; 738 const PetscInt *r; 739 PetscInt nz,*vj,k,idx; 740 MatScalar *aa=a->a,*v,*diag; 741 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 742 743 PetscFunctionBegin; 744 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 745 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 746 t = a->solve_work; 747 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 748 749 /* solve U^T * D * y = b by forward substitution */ 750 tp = t; 751 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 752 idx = 5*r[k]; 753 tp[0] = b[idx]; 754 tp[1] = b[idx+1]; 755 tp[2] = b[idx+2]; 756 tp[3] = b[idx+3]; 757 tp[4] = b[idx+4]; 758 tp += 5; 759 } 760 761 for (k=0; k<mbs; k++){ 762 v = aa + 25*ai[k]; 763 vj = aj + ai[k]; 764 tp = t + k*5; 765 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 766 nz = ai[k+1] - ai[k]; 767 768 tp = t + (*vj)*5; 769 while (nz--) { 770 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 771 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 772 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 773 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 774 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 775 vj++; tp = t + (*vj)*5; 776 v += 25; 777 } 778 779 /* xk = inv(Dk)*(Dk*xk) */ 780 diag = aa+k*25; /* ptr to inv(Dk) */ 781 tp = t + k*5; 782 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 783 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 784 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 785 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 786 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 787 } 788 789 /* solve U*x = y by back substitution */ 790 for (k=mbs-1; k>=0; k--){ 791 v = aa + 25*ai[k]; 792 vj = aj + ai[k]; 793 tp = t + k*5; 794 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 795 nz = ai[k+1] - ai[k]; 796 797 tp = t + (*vj)*5; 798 while (nz--) { 799 /* xk += U(k,:)*x(:) */ 800 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 801 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 802 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 803 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 804 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 805 vj++; tp = t + (*vj)*5; 806 v += 25; 807 } 808 tp = t + k*5; 809 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 810 idx = 5*r[k]; 811 x[idx] = x0; 812 x[idx+1] = x1; 813 x[idx+2] = x2; 814 x[idx+3] = x3; 815 x[idx+4] = x4; 816 } 817 818 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 819 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 820 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 821 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNCT__ 826 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 827 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 828 { 829 MatScalar *v,*diag; 830 PetscScalar *xp,x0,x1,x2,x3,x4; 831 PetscInt nz,*vj,k; 832 833 PetscFunctionBegin; 834 for (k=0; k<mbs; k++){ 835 v = aa + 25*ai[k]; 836 xp = x + k*5; 837 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 838 nz = ai[k+1] - ai[k]; 839 vj = aj + ai[k]; 840 xp = x + (*vj)*5; 841 while (nz--) { 842 /* x(:) += U(k,:)^T*(Dk*xk) */ 843 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 844 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 845 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 846 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 847 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 848 vj++; xp = x + (*vj)*5; 849 v += 25; 850 } 851 /* xk = inv(Dk)*(Dk*xk) */ 852 diag = aa+k*25; /* ptr to inv(Dk) */ 853 xp = x + k*5; 854 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 855 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 856 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 857 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 858 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 859 } 860 PetscFunctionReturn(0); 861 } 862 863 #undef __FUNCT__ 864 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 865 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 866 { 867 MatScalar *v; 868 PetscScalar *xp,x0,x1,x2,x3,x4; 869 PetscInt nz,*vj,k; 870 871 PetscFunctionBegin; 872 for (k=mbs-1; k>=0; k--){ 873 v = aa + 25*ai[k]; 874 xp = x + k*5; 875 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 876 nz = ai[k+1] - ai[k]; 877 vj = aj + ai[k]; 878 xp = x + (*vj)*5; 879 while (nz--) { 880 /* xk += U(k,:)*x(:) */ 881 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 882 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 883 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 884 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 885 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 886 vj++; 887 v += 25; xp = x + (*vj)*5; 888 } 889 xp = x + k*5; 890 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 891 } 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 897 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 898 { 899 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 900 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 901 MatScalar *aa=a->a; 902 PetscScalar *x,*b; 903 PetscErrorCode ierr; 904 905 PetscFunctionBegin; 906 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 907 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 908 909 /* solve U^T * D * y = b by forward substitution */ 910 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 911 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 912 913 /* solve U*x = y by back substitution */ 914 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 915 916 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 917 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 918 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering" 924 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 925 { 926 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 927 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 928 MatScalar *aa=a->a; 929 PetscScalar *x,*b; 930 PetscErrorCode ierr; 931 932 PetscFunctionBegin; 933 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 934 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 935 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 936 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 937 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 938 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 939 ierr = PetscLogFlops(25*(a->nz + mbs));CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 #undef __FUNCT__ 944 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering" 945 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 946 { 947 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 948 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 949 MatScalar *aa=a->a; 950 PetscScalar *x,*b; 951 PetscErrorCode ierr; 952 953 PetscFunctionBegin; 954 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 955 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 956 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 957 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 958 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 959 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 960 ierr = PetscLogFlops(25*a->nz);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 966 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 967 { 968 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 969 IS isrow=a->row; 970 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 971 PetscErrorCode ierr; 972 const PetscInt *r; 973 PetscInt nz,*vj,k,idx; 974 MatScalar *aa=a->a,*v,*diag; 975 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 976 977 PetscFunctionBegin; 978 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 979 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 980 t = a->solve_work; 981 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 982 983 /* solve U^T * D * y = b by forward substitution */ 984 tp = t; 985 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 986 idx = 4*r[k]; 987 tp[0] = b[idx]; 988 tp[1] = b[idx+1]; 989 tp[2] = b[idx+2]; 990 tp[3] = b[idx+3]; 991 tp += 4; 992 } 993 994 for (k=0; k<mbs; k++){ 995 v = aa + 16*ai[k]; 996 vj = aj + ai[k]; 997 tp = t + k*4; 998 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 999 nz = ai[k+1] - ai[k]; 1000 1001 tp = t + (*vj)*4; 1002 while (nz--) { 1003 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1004 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1005 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1006 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1007 vj++; tp = t + (*vj)*4; 1008 v += 16; 1009 } 1010 1011 /* xk = inv(Dk)*(Dk*xk) */ 1012 diag = aa+k*16; /* ptr to inv(Dk) */ 1013 tp = t + k*4; 1014 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1015 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1016 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1017 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1018 } 1019 1020 /* solve U*x = y by back substitution */ 1021 for (k=mbs-1; k>=0; k--){ 1022 v = aa + 16*ai[k]; 1023 vj = aj + ai[k]; 1024 tp = t + k*4; 1025 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 1026 nz = ai[k+1] - ai[k]; 1027 1028 tp = t + (*vj)*4; 1029 while (nz--) { 1030 /* xk += U(k,:)*x(:) */ 1031 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 1032 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 1033 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 1034 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 1035 vj++; tp = t + (*vj)*4; 1036 v += 16; 1037 } 1038 tp = t + k*4; 1039 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 1040 idx = 4*r[k]; 1041 x[idx] = x0; 1042 x[idx+1] = x1; 1043 x[idx+2] = x2; 1044 x[idx+3] = x3; 1045 } 1046 1047 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1048 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1049 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1050 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1056 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1057 { 1058 MatScalar *v,*diag; 1059 PetscScalar *xp,x0,x1,x2,x3; 1060 PetscInt nz,*vj,k; 1061 1062 PetscFunctionBegin; 1063 for (k=0; k<mbs; k++){ 1064 v = aa + 16*ai[k]; 1065 xp = x + k*4; 1066 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 1067 nz = ai[k+1] - ai[k]; 1068 vj = aj + ai[k]; 1069 xp = x + (*vj)*4; 1070 while (nz--) { 1071 /* x(:) += U(k,:)^T*(Dk*xk) */ 1072 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1073 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1074 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1075 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1076 vj++; xp = x + (*vj)*4; 1077 v += 16; 1078 } 1079 /* xk = inv(Dk)*(Dk*xk) */ 1080 diag = aa+k*16; /* ptr to inv(Dk) */ 1081 xp = x + k*4; 1082 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1083 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1084 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1085 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1092 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1093 { 1094 MatScalar *v; 1095 PetscScalar *xp,x0,x1,x2,x3; 1096 PetscInt nz,*vj,k; 1097 1098 PetscFunctionBegin; 1099 for (k=mbs-1; k>=0; k--){ 1100 v = aa + 16*ai[k]; 1101 xp = x + k*4; 1102 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 1103 nz = ai[k+1] - ai[k]; 1104 vj = aj + ai[k]; 1105 xp = x + (*vj)*4; 1106 while (nz--) { 1107 /* xk += U(k,:)*x(:) */ 1108 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 1109 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 1110 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 1111 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 1112 vj++; 1113 v += 16; xp = x + (*vj)*4; 1114 } 1115 xp = x + k*4; 1116 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 1117 } 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 1123 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1124 { 1125 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1126 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1127 MatScalar *aa=a->a; 1128 PetscScalar *x,*b; 1129 PetscErrorCode ierr; 1130 1131 PetscFunctionBegin; 1132 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1133 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1134 1135 /* solve U^T * D * y = b by forward substitution */ 1136 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1137 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1138 1139 /* solve U*x = y by back substitution */ 1140 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1141 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1142 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1143 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering" 1149 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1150 { 1151 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1152 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1153 MatScalar *aa=a->a; 1154 PetscScalar *x,*b; 1155 PetscErrorCode ierr; 1156 1157 PetscFunctionBegin; 1158 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1159 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1160 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1161 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1162 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1163 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1164 ierr = PetscLogFlops(16*a->nz + mbs);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering" 1170 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1171 { 1172 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1173 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1174 MatScalar *aa=a->a; 1175 PetscScalar *x,*b; 1176 PetscErrorCode ierr; 1177 1178 PetscFunctionBegin; 1179 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1180 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1181 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1182 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1183 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1184 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1185 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 1191 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 1192 { 1193 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1194 IS isrow=a->row; 1195 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1196 PetscErrorCode ierr; 1197 const PetscInt *r; 1198 PetscInt nz,*vj,k,idx; 1199 MatScalar *aa=a->a,*v,*diag; 1200 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 1201 1202 PetscFunctionBegin; 1203 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1204 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1205 t = a->solve_work; 1206 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1207 1208 /* solve U^T * D * y = b by forward substitution */ 1209 tp = t; 1210 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1211 idx = 3*r[k]; 1212 tp[0] = b[idx]; 1213 tp[1] = b[idx+1]; 1214 tp[2] = b[idx+2]; 1215 tp += 3; 1216 } 1217 1218 for (k=0; k<mbs; k++){ 1219 v = aa + 9*ai[k]; 1220 vj = aj + ai[k]; 1221 tp = t + k*3; 1222 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 1223 nz = ai[k+1] - ai[k]; 1224 1225 tp = t + (*vj)*3; 1226 while (nz--) { 1227 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1228 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1229 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1230 vj++; tp = t + (*vj)*3; 1231 v += 9; 1232 } 1233 1234 /* xk = inv(Dk)*(Dk*xk) */ 1235 diag = aa+k*9; /* ptr to inv(Dk) */ 1236 tp = t + k*3; 1237 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1238 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1239 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1240 } 1241 1242 /* solve U*x = y by back substitution */ 1243 for (k=mbs-1; k>=0; k--){ 1244 v = aa + 9*ai[k]; 1245 vj = aj + ai[k]; 1246 tp = t + k*3; 1247 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 1248 nz = ai[k+1] - ai[k]; 1249 1250 tp = t + (*vj)*3; 1251 while (nz--) { 1252 /* xk += U(k,:)*x(:) */ 1253 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 1254 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 1255 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 1256 vj++; tp = t + (*vj)*3; 1257 v += 9; 1258 } 1259 tp = t + k*3; 1260 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1261 idx = 3*r[k]; 1262 x[idx] = x0; 1263 x[idx+1] = x1; 1264 x[idx+2] = x2; 1265 } 1266 1267 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1268 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1269 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1270 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #undef __FUNCT__ 1275 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1276 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1277 { 1278 MatScalar *v,*diag; 1279 PetscScalar *xp,x0,x1,x2; 1280 PetscInt nz,*vj,k; 1281 1282 PetscFunctionBegin; 1283 for (k=0; k<mbs; k++){ 1284 v = aa + 9*ai[k]; 1285 xp = x + k*3; 1286 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1287 nz = ai[k+1] - ai[k]; 1288 vj = aj + ai[k]; 1289 xp = x + (*vj)*3; 1290 while (nz--) { 1291 /* x(:) += U(k,:)^T*(Dk*xk) */ 1292 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1293 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1294 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1295 vj++; xp = x + (*vj)*3; 1296 v += 9; 1297 } 1298 /* xk = inv(Dk)*(Dk*xk) */ 1299 diag = aa+k*9; /* ptr to inv(Dk) */ 1300 xp = x + k*3; 1301 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1302 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1303 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1304 } 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1310 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1311 { 1312 MatScalar *v; 1313 PetscScalar *xp,x0,x1,x2; 1314 PetscInt nz,*vj,k; 1315 1316 PetscFunctionBegin; 1317 for (k=mbs-1; k>=0; k--){ 1318 v = aa + 9*ai[k]; 1319 xp = x + k*3; 1320 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1321 nz = ai[k+1] - ai[k]; 1322 vj = aj + ai[k]; 1323 xp = x + (*vj)*3; 1324 while (nz--) { 1325 /* xk += U(k,:)*x(:) */ 1326 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1327 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1328 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1329 vj++; 1330 v += 9; xp = x + (*vj)*3; 1331 } 1332 xp = x + k*3; 1333 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1334 } 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1340 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1341 { 1342 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1343 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1344 MatScalar *aa=a->a; 1345 PetscScalar *x,*b; 1346 PetscErrorCode ierr; 1347 1348 PetscFunctionBegin; 1349 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1351 1352 /* solve U^T * D * y = b by forward substitution */ 1353 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1354 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1355 1356 /* solve U*x = y by back substitution */ 1357 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1358 1359 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1360 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1361 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering" 1367 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1368 { 1369 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1370 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1371 MatScalar *aa=a->a; 1372 PetscScalar *x,*b; 1373 PetscErrorCode ierr; 1374 1375 PetscFunctionBegin; 1376 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1377 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1378 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1379 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1380 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1381 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1382 ierr = PetscLogFlops(9*(a->nz + mbs));CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 #undef __FUNCT__ 1387 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering" 1388 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1389 { 1390 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1391 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1392 MatScalar *aa=a->a; 1393 PetscScalar *x,*b; 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1398 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1399 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1400 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1401 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1402 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1403 ierr = PetscLogFlops(9*a->nz);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1409 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1410 { 1411 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1412 IS isrow=a->row; 1413 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1414 PetscErrorCode ierr; 1415 const PetscInt *r; 1416 PetscInt nz,*vj,k,k2,idx; 1417 MatScalar *aa=a->a,*v,*diag; 1418 PetscScalar *x,*b,x0,x1,*t; 1419 1420 PetscFunctionBegin; 1421 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1422 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1423 t = a->solve_work; 1424 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1425 1426 /* solve U^T * D * y = perm(b) by forward substitution */ 1427 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1428 idx = 2*r[k]; 1429 t[k*2] = b[idx]; 1430 t[k*2+1] = b[idx+1]; 1431 } 1432 for (k=0; k<mbs; k++){ 1433 v = aa + 4*ai[k]; 1434 vj = aj + ai[k]; 1435 k2 = k*2; 1436 x0 = t[k2]; x1 = t[k2+1]; 1437 nz = ai[k+1] - ai[k]; 1438 while (nz--) { 1439 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1440 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1441 vj++; v += 4; 1442 } 1443 diag = aa+k*4; /* ptr to inv(Dk) */ 1444 t[k2] = diag[0]*x0 + diag[2]*x1; 1445 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1446 } 1447 1448 /* solve U*x = y by back substitution */ 1449 for (k=mbs-1; k>=0; k--){ 1450 v = aa + 4*ai[k]; 1451 vj = aj + ai[k]; 1452 k2 = k*2; 1453 x0 = t[k2]; x1 = t[k2+1]; 1454 nz = ai[k+1] - ai[k]; 1455 while (nz--) { 1456 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1457 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1458 vj++; v += 4; 1459 } 1460 t[k2] = x0; 1461 t[k2+1] = x1; 1462 idx = 2*r[k]; 1463 x[idx] = x0; 1464 x[idx+1] = x1; 1465 } 1466 1467 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1468 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1469 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1470 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1476 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1477 { 1478 MatScalar *v,*diag; 1479 PetscScalar x0,x1; 1480 PetscInt nz,*vj,k,k2; 1481 1482 PetscFunctionBegin; 1483 for (k=0; k<mbs; k++){ 1484 v = aa + 4*ai[k]; 1485 vj = aj + ai[k]; 1486 k2 = k*2; 1487 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1488 nz = ai[k+1] - ai[k]; 1489 1490 while (nz--) { 1491 /* x(:) += U(k,:)^T*(Dk*xk) */ 1492 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1493 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1494 vj++; v += 4; 1495 } 1496 /* xk = inv(Dk)*(Dk*xk) */ 1497 diag = aa+k*4; /* ptr to inv(Dk) */ 1498 x[k2] = diag[0]*x0 + diag[2]*x1; 1499 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1506 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1507 { 1508 MatScalar *v; 1509 PetscScalar x0,x1; 1510 PetscInt nz,*vj,k,k2; 1511 1512 PetscFunctionBegin; 1513 for (k=mbs-1; k>=0; k--){ 1514 v = aa + 4*ai[k]; 1515 vj = aj + ai[k]; 1516 k2 = k*2; 1517 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1518 nz = ai[k+1] - ai[k]; 1519 while (nz--) { 1520 /* xk += U(k,:)*x(:) */ 1521 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1522 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1523 vj++; v += 4; 1524 } 1525 x[k2] = x0; 1526 x[k2+1] = x1; 1527 } 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1533 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1534 { 1535 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1536 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1537 MatScalar *aa=a->a; 1538 PetscScalar *x,*b; 1539 PetscErrorCode ierr; 1540 1541 PetscFunctionBegin; 1542 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1543 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1544 1545 /* solve U^T * D * y = b by forward substitution */ 1546 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1547 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1548 1549 /* solve U*x = y by back substitution */ 1550 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1551 1552 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1553 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1554 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */ 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering" 1560 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1561 { 1562 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1563 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1564 MatScalar *aa=a->a; 1565 PetscScalar *x,*b; 1566 PetscErrorCode ierr; 1567 1568 PetscFunctionBegin; 1569 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1570 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1571 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1572 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1573 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1574 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1575 ierr = PetscLogFlops(4*(a->nz + mbs));CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering" 1581 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1582 { 1583 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1584 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1585 MatScalar *aa=a->a; 1586 PetscScalar *x,*b; 1587 PetscErrorCode ierr; 1588 1589 PetscFunctionBegin; 1590 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1591 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1592 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1593 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1594 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1595 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1596 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1602 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1603 { 1604 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1605 IS isrow=a->row; 1606 PetscErrorCode ierr; 1607 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1608 const MatScalar *aa=a->a,*v; 1609 PetscScalar *x,*b,xk,*t; 1610 PetscInt nz,k; 1611 1612 PetscFunctionBegin; 1613 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1614 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1615 t = a->solve_work; 1616 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1617 1618 /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */ 1619 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1620 for (k=0; k<mbs; k++){ 1621 v = aa + ai[k] + 1; 1622 vj = aj + ai[k] + 1; 1623 xk = t[k]; 1624 nz = ai[k+1] - ai[k] - 1; 1625 while (nz--) t[*vj++] += (*v++) * xk; 1626 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1627 } 1628 1629 /* solve U*perm(x) = y by back substitution */ 1630 for (k=mbs-1; k>=0; k--){ 1631 v = aa + ai[k] + 1; 1632 vj = aj + ai[k] + 1; 1633 nz = ai[k+1] - ai[k] - 1; 1634 while (nz--) t[k] += (*v++) * t[*vj++]; 1635 x[rp[k]] = t[k]; 1636 } 1637 1638 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1639 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1640 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1641 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1" 1647 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1648 { 1649 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1650 IS isrow=a->row; 1651 PetscErrorCode ierr; 1652 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1653 const MatScalar *aa=a->a,*v; 1654 PetscReal diagk; 1655 PetscScalar *x,*b,xk; 1656 PetscInt nz,k; 1657 1658 PetscFunctionBegin; 1659 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ 1660 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1661 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1662 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1663 1664 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1665 for (k=0; k<mbs; k++){ 1666 v = aa + ai[k] + 1; 1667 vj = aj + ai[k] + 1; 1668 xk = x[k]; 1669 nz = ai[k+1] - ai[k] - 1; 1670 while (nz--) x[*vj++] += (*v++) * xk; 1671 1672 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1673 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1674 x[k] = xk*sqrt(diagk); 1675 } 1676 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1677 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1678 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1679 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1" 1685 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1686 { 1687 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1688 IS isrow=a->row; 1689 PetscErrorCode ierr; 1690 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1691 const MatScalar *aa=a->a,*v; 1692 PetscReal diagk; 1693 PetscScalar *x,*b,*t; 1694 PetscInt nz,k; 1695 1696 PetscFunctionBegin; 1697 /* solve D^(1/2)*U*perm(x) = b by back substitution */ 1698 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1699 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1700 t = a->solve_work; 1701 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1702 1703 for (k=mbs-1; k>=0; k--){ 1704 v = aa + ai[k] + 1; 1705 vj = aj + ai[k] + 1; 1706 diagk = PetscRealPart(aa[ai[k]]); 1707 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1708 t[k] = b[k] * sqrt(diagk); 1709 nz = ai[k+1] - ai[k] - 1; 1710 while (nz--) t[k] += (*v++) * t[*vj++]; 1711 x[rp[k]] = t[k]; 1712 } 1713 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1714 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1715 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1716 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1722 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1723 { 1724 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1725 PetscErrorCode ierr; 1726 1727 PetscFunctionBegin; 1728 if (A->rmap->bs == 1) { 1729 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1730 } else { 1731 IS isrow=a->row; 1732 const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1733 const MatScalar *aa=a->a,*v; 1734 PetscScalar *x,*b,*t; 1735 PetscInt nz,k,n,i; 1736 if (bb->n > a->solves_work_n) { 1737 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1738 ierr = PetscMalloc(bb->n*A->rmap->N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1739 a->solves_work_n = bb->n; 1740 } 1741 n = bb->n; 1742 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1743 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1744 t = a->solves_work; 1745 1746 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1747 1748 /* solve U^T*D*y = perm(b) by forward substitution */ 1749 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */ 1750 for (k=0; k<mbs; k++){ 1751 v = aa + ai[k]; 1752 vj = aj + ai[k]; 1753 nz = ai[k+1] - ai[k]; 1754 while (nz--) { 1755 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1756 v++;vj++; 1757 } 1758 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1759 } 1760 1761 /* solve U*perm(x) = y by back substitution */ 1762 for (k=mbs-1; k>=0; k--){ 1763 v = aa + ai[k]; 1764 vj = aj + ai[k]; 1765 nz = ai[k+1] - ai[k]; 1766 while (nz--) { 1767 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1768 v++;vj++; 1769 } 1770 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i]; 1771 } 1772 1773 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1774 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1775 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1776 ierr = PetscLogFlops(bb->n*(4*a->nz + A->rmap->N));CHKERRQ(ierr); 1777 } 1778 PetscFunctionReturn(0); 1779 } 1780 1781 /* 1782 Special case where the matrix was ILU(0) factored in the natural 1783 ordering. This eliminates the need for the column and row permutation. 1784 */ 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1787 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1788 { 1789 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1790 PetscErrorCode ierr; 1791 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1792 MatScalar *aa=a->a,*v; 1793 PetscScalar *x,*b,xk; 1794 PetscInt nz,*vj,k; 1795 1796 PetscFunctionBegin; 1797 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1798 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1799 1800 /* solve U^T*D*y = b by forward substitution */ 1801 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1802 for (k=0; k<mbs; k++){ 1803 v = aa + ai[k] + 1; 1804 vj = aj + ai[k] + 1; 1805 xk = x[k]; 1806 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1807 while (nz--) x[*vj++] += (*v++) * xk; 1808 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1809 } 1810 1811 /* solve U*x = y by back substitution */ 1812 for (k=mbs-2; k>=0; k--){ 1813 v = aa + ai[k] + 1; 1814 vj = aj + ai[k] + 1; 1815 xk = x[k]; 1816 nz = ai[k+1] - ai[k] - 1; 1817 while (nz--) xk += (*v++) * x[*vj++]; 1818 x[k] = xk; 1819 } 1820 1821 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1822 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1823 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 1824 PetscFunctionReturn(0); 1825 } 1826 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" 1829 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1830 { 1831 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1832 PetscErrorCode ierr; 1833 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1834 MatScalar *aa=a->a,*v; 1835 PetscReal diagk; 1836 PetscScalar *x,*b; 1837 PetscInt nz,*vj,k; 1838 1839 PetscFunctionBegin; 1840 /* solve U^T*D^(1/2)*x = b by forward substitution */ 1841 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1842 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1843 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1844 for (k=0; k<mbs; k++){ 1845 v = aa + ai[k] + 1; 1846 vj = aj + ai[k] + 1; 1847 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1848 while (nz--) x[*vj++] += (*v++) * x[k]; 1849 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1850 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]])); 1851 x[k] *= sqrt(diagk); 1852 } 1853 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1854 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1855 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" 1861 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1862 { 1863 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1864 PetscErrorCode ierr; 1865 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1866 MatScalar *aa=a->a,*v; 1867 PetscReal diagk; 1868 PetscScalar *x,*b; 1869 PetscInt nz,*vj,k; 1870 1871 PetscFunctionBegin; 1872 /* solve D^(1/2)*U*x = b by back substitution */ 1873 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1874 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1875 1876 for (k=mbs-1; k>=0; k--){ 1877 v = aa + ai[k] + 1; 1878 vj = aj + ai[k] + 1; 1879 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1880 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1881 x[k] = sqrt(diagk)*b[k]; 1882 nz = ai[k+1] - ai[k] - 1; 1883 while (nz--) x[k] += (*v++) * x[*vj++]; 1884 } 1885 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1886 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1887 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1888 PetscFunctionReturn(0); 1889 } 1890 1891 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth); 1892 1893 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 1896 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat B,Mat A,IS perm,const MatFactorInfo *info) 1897 { 1898 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1899 PetscErrorCode ierr; 1900 const PetscInt *rip,mbs = a->mbs,*ai = a->i,*aj = a->j; 1901 PetscInt *jutmp,bs = A->rmap->bs,bs2=a->bs2,i; 1902 PetscInt m,reallocs = 0,*levtmp; 1903 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 1904 PetscInt incrlev,*lev,shift,prow,nz; 1905 PetscReal f = info->fill,levels = info->levels; 1906 PetscTruth perm_identity; 1907 1908 PetscFunctionBegin; 1909 /* check whether perm is the identity mapping */ 1910 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1911 1912 if (perm_identity){ 1913 a->permute = PETSC_FALSE; 1914 ai = a->i; aj = a->j; 1915 } else { /* non-trivial permutation */ 1916 a->permute = PETSC_TRUE; 1917 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1918 ai = a->inew; aj = a->jnew; 1919 } 1920 1921 /* initialization */ 1922 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1923 umax = (PetscInt)(f*ai[mbs] + 1); 1924 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 1925 umax += mbs + 1; 1926 shift = mbs + 1; 1927 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 1928 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 1929 iu[0] = mbs + 1; 1930 juidx = mbs + 1; 1931 /* prowl: linked list for pivot row */ 1932 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr); 1933 /* q: linked list for col index */ 1934 q = prowl + mbs; 1935 levtmp = q + mbs; 1936 1937 for (i=0; i<mbs; i++){ 1938 prowl[i] = mbs; 1939 q[i] = 0; 1940 } 1941 1942 /* for each row k */ 1943 for (k=0; k<mbs; k++){ 1944 nzk = 0; 1945 q[k] = mbs; 1946 /* copy current row into linked list */ 1947 nz = ai[rip[k]+1] - ai[rip[k]]; 1948 j = ai[rip[k]]; 1949 while (nz--){ 1950 vj = rip[aj[j++]]; 1951 if (vj > k){ 1952 qm = k; 1953 do { 1954 m = qm; qm = q[m]; 1955 } while(qm < vj); 1956 if (qm == vj) { 1957 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 1958 } 1959 nzk++; 1960 q[m] = vj; 1961 q[vj] = qm; 1962 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1963 } 1964 } 1965 1966 /* modify nonzero structure of k-th row by computing fill-in 1967 for each row prow to be merged in */ 1968 prow = k; 1969 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1970 1971 while (prow < k){ 1972 /* merge row prow into k-th row */ 1973 jmin = iu[prow] + 1; 1974 jmax = iu[prow+1]; 1975 qm = k; 1976 for (j=jmin; j<jmax; j++){ 1977 incrlev = lev[j-shift] + 1; 1978 if (incrlev > levels) continue; 1979 vj = ju[j]; 1980 do { 1981 m = qm; qm = q[m]; 1982 } while (qm < vj); 1983 if (qm != vj){ /* a fill */ 1984 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1985 levtmp[vj] = incrlev; 1986 } else { 1987 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1988 } 1989 } 1990 prow = prowl[prow]; /* next pivot row */ 1991 } 1992 1993 /* add k to row list for first nonzero element in k-th row */ 1994 if (nzk > 1){ 1995 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1996 prowl[k] = prowl[i]; prowl[i] = k; 1997 } 1998 iu[k+1] = iu[k] + nzk; 1999 2000 /* allocate more space to ju and lev if needed */ 2001 if (iu[k+1] > umax) { 2002 /* estimate how much additional space we will need */ 2003 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 2004 /* just double the memory each time */ 2005 maxadd = umax; 2006 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 2007 umax += maxadd; 2008 2009 /* allocate a longer ju */ 2010 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2011 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 2012 ierr = PetscFree(ju);CHKERRQ(ierr); 2013 ju = jutmp; 2014 2015 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2016 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 2017 ierr = PetscFree(lev);CHKERRQ(ierr); 2018 lev = jutmp; 2019 reallocs += 2; /* count how many times we realloc */ 2020 } 2021 2022 /* save nonzero structure of k-th row in ju */ 2023 i=k; 2024 while (nzk --) { 2025 i = q[i]; 2026 ju[juidx] = i; 2027 lev[juidx-shift] = levtmp[i]; 2028 juidx++; 2029 } 2030 } 2031 2032 #if defined(PETSC_USE_INFO) 2033 if (ai[mbs] != 0) { 2034 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2035 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2036 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2037 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 2038 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2039 } else { 2040 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2041 } 2042 #endif 2043 2044 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2045 ierr = PetscFree(prowl);CHKERRQ(ierr); 2046 ierr = PetscFree(lev);CHKERRQ(ierr); 2047 2048 /* put together the new matrix */ 2049 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 2050 2051 /* ierr = PetscLogObjectParent(B,iperm);CHKERRQ(ierr); */ 2052 b = (Mat_SeqSBAIJ*)(B)->data; 2053 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2054 b->singlemalloc = PETSC_FALSE; 2055 b->free_a = PETSC_TRUE; 2056 b->free_ij = PETSC_TRUE; 2057 /* the next line frees the default space generated by the Create() */ 2058 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 2059 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 2060 b->j = ju; 2061 b->i = iu; 2062 b->diag = 0; 2063 b->ilen = 0; 2064 b->imax = 0; 2065 2066 if (b->row) { 2067 ierr = ISDestroy(b->row);CHKERRQ(ierr); 2068 } 2069 if (b->icol) { 2070 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 2071 } 2072 b->row = perm; 2073 b->icol = perm; 2074 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2075 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2076 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2077 /* In b structure: Free imax, ilen, old a, old j. 2078 Allocate idnew, solve_work, new a, new j */ 2079 ierr = PetscLogObjectMemory(B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2080 b->maxnz = b->nz = iu[mbs]; 2081 2082 (B)->info.factor_mallocs = reallocs; 2083 (B)->info.fill_ratio_given = f; 2084 if (ai[mbs] != 0) { 2085 (B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2086 } else { 2087 (B)->info.fill_ratio_needed = 0.0; 2088 } 2089 ierr = MatSeqSBAIJSetNumericFactorization(B,perm_identity);CHKERRQ(ierr); 2090 PetscFunctionReturn(0); 2091 } 2092 2093 #include "petscbt.h" 2094 #include "../src/mat/utils/freespace.h" 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 2097 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2098 { 2099 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2100 Mat_SeqSBAIJ *b; 2101 PetscErrorCode ierr; 2102 PetscTruth perm_identity,free_ij = PETSC_TRUE,missing; 2103 PetscInt bs=A->rmap->bs,am=a->mbs,d; 2104 const PetscInt *cols,*rip,*ai,*aj; 2105 PetscInt reallocs=0,i,*ui; 2106 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2107 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 2108 PetscReal fill=info->fill,levels=info->levels,ratio_needed; 2109 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2110 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2111 PetscBT lnkbt; 2112 2113 PetscFunctionBegin; 2114 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2115 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2116 2117 /* 2118 This code originally uses Modified Sparse Row (MSR) storage 2119 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 2120 Then it is rewritten so the factor B takes seqsbaij format. However the associated 2121 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 2122 thus the original code in MSR format is still used for these cases. 2123 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 2124 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 2125 */ 2126 if (bs > 1){ 2127 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 /* check whether perm is the identity mapping */ 2132 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2133 2134 /* special case that simply copies fill pattern */ 2135 if (!levels && perm_identity) { 2136 a->permute = PETSC_FALSE; 2137 /* reuse the column pointers and row offsets for memory savings */ 2138 ui = a->i; 2139 uj = a->j; 2140 free_ij = PETSC_FALSE; 2141 ratio_needed = 1.0; 2142 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2143 if (perm_identity){ 2144 a->permute = PETSC_FALSE; 2145 ai = a->i; aj = a->j; 2146 } else { /* non-trivial permutation */ 2147 a->permute = PETSC_TRUE; 2148 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2149 ai = a->inew; aj = a->jnew; 2150 } 2151 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2152 2153 /* initialization */ 2154 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2155 ui[0] = 0; 2156 2157 /* jl: linked list for storing indices of the pivot rows 2158 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2159 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 2160 il = jl + am; 2161 uj_ptr = (PetscInt**)(il + am); 2162 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 2163 for (i=0; i<am; i++){ 2164 jl[i] = am; il[i] = 0; 2165 } 2166 2167 /* create and initialize a linked list for storing column indices of the active row k */ 2168 nlnk = am + 1; 2169 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2170 2171 /* initial FreeSpace size is fill*(ai[am]+1) */ 2172 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2173 current_space = free_space; 2174 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2175 current_space_lvl = free_space_lvl; 2176 2177 for (k=0; k<am; k++){ /* for each active row k */ 2178 /* initialize lnk by the column indices of row rip[k] */ 2179 nzk = 0; 2180 ncols = ai[rip[k]+1] - ai[rip[k]]; 2181 cols = aj+ai[rip[k]]; 2182 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2183 nzk += nlnk; 2184 2185 /* update lnk by computing fill-in for each pivot row to be merged in */ 2186 prow = jl[k]; /* 1st pivot row */ 2187 2188 while (prow < k){ 2189 nextprow = jl[prow]; 2190 2191 /* merge prow into k-th row */ 2192 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2193 jmax = ui[prow+1]; 2194 ncols = jmax-jmin; 2195 i = jmin - ui[prow]; 2196 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2197 j = *(uj_lvl_ptr[prow] + i - 1); 2198 cols_lvl = uj_lvl_ptr[prow]+i; 2199 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2200 nzk += nlnk; 2201 2202 /* update il and jl for prow */ 2203 if (jmin < jmax){ 2204 il[prow] = jmin; 2205 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2206 } 2207 prow = nextprow; 2208 } 2209 2210 /* if free space is not available, make more free space */ 2211 if (current_space->local_remaining<nzk) { 2212 i = am - k + 1; /* num of unfactored rows */ 2213 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2214 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2215 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2216 reallocs++; 2217 } 2218 2219 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2220 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2221 2222 /* add the k-th row into il and jl */ 2223 if (nzk-1 > 0){ 2224 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2225 jl[k] = jl[i]; jl[i] = k; 2226 il[k] = ui[k] + 1; 2227 } 2228 uj_ptr[k] = current_space->array; 2229 uj_lvl_ptr[k] = current_space_lvl->array; 2230 2231 current_space->array += nzk; 2232 current_space->local_used += nzk; 2233 current_space->local_remaining -= nzk; 2234 current_space_lvl->array += nzk; 2235 current_space_lvl->local_used += nzk; 2236 current_space_lvl->local_remaining -= nzk; 2237 2238 ui[k+1] = ui[k] + nzk; 2239 } 2240 2241 #if defined(PETSC_USE_INFO) 2242 if (ai[am] != 0) { 2243 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2244 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2245 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2246 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2247 } else { 2248 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2249 } 2250 #endif 2251 2252 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2253 ierr = PetscFree(jl);CHKERRQ(ierr); 2254 2255 /* destroy list of free space and other temporary array(s) */ 2256 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2257 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2258 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2259 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2260 if (ai[am] != 0) { 2261 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2262 } else { 2263 ratio_needed = 0.0; 2264 } 2265 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2266 2267 /* put together the new matrix in MATSEQSBAIJ format */ 2268 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2269 2270 b = (Mat_SeqSBAIJ*)(fact)->data; 2271 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2272 b->singlemalloc = PETSC_FALSE; 2273 b->free_a = PETSC_TRUE; 2274 b->free_ij = free_ij; 2275 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2276 b->j = uj; 2277 b->i = ui; 2278 b->diag = 0; 2279 b->ilen = 0; 2280 b->imax = 0; 2281 b->row = perm; 2282 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2283 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2284 b->icol = perm; 2285 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2286 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2287 b->maxnz = b->nz = ui[am]; 2288 2289 (fact)->info.factor_mallocs = reallocs; 2290 (fact)->info.fill_ratio_given = fill; 2291 (fact)->info.fill_ratio_needed = ratio_needed; 2292 ierr = MatSeqSBAIJSetNumericFactorization(fact,perm_identity);CHKERRQ(ierr); 2293 PetscFunctionReturn(0); 2294 } 2295 2296