1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for SBAIJ format. 5 */ 6 7 #include "src/mat/impls/sbaij/seq/sbaij.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/inline/ilu.h" 10 #include "src/inline/dot.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 15 { 16 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 17 IS isrow=a->row; 18 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 19 PetscErrorCode ierr; 20 PetscInt nz,*vj,k,*r,idx,k1; 21 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 22 MatScalar *aa=a->a,*v,*diag; 23 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 24 25 PetscFunctionBegin; 26 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 t = a->solve_work; 29 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 30 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 31 32 /* solve U^T * D * y = b by forward substitution */ 33 xk = t; 34 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 35 idx = bs*r[k]; 36 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 37 } 38 for (k=0; k<mbs; k++){ 39 v = aa + bs2*ai[k]; 40 xk = t + k*bs; /* Dk*xk = k-th block of x */ 41 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 42 nz = ai[k+1] - ai[k]; 43 vj = aj + ai[k]; 44 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 45 while (nz--) { 46 /* x(:) += U(k,:)^T*(Dk*xk) */ 47 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 48 vj++; xj = t + (*vj)*bs; 49 v += bs2; 50 } 51 /* xk = inv(Dk)*(Dk*xk) */ 52 diag = aa+k*bs2; /* ptr to inv(Dk) */ 53 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 54 } 55 56 /* solve U*x = y by back substitution */ 57 for (k=mbs-1; k>=0; k--){ 58 v = aa + bs2*ai[k]; 59 xk = t + k*bs; /* xk */ 60 nz = ai[k+1] - ai[k]; 61 vj = aj + ai[k]; 62 xj = t + (*vj)*bs; 63 while (nz--) { 64 /* xk += U(k,:)*x(:) */ 65 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 66 vj++; 67 v += bs2; xj = t + (*vj)*bs; 68 } 69 idx = bs*r[k]; 70 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 71 } 72 73 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 74 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 75 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 76 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 77 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N" 83 PetscErrorCode MatForwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 84 { 85 PetscFunctionBegin; 86 SETERRQ(1,"not implemented yet"); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N" 92 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 93 { 94 PetscFunctionBegin; 95 SETERRQ(1,"not implemented yet"); 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 101 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 102 { 103 PetscErrorCode ierr; 104 PetscInt nz,*vj,k; 105 PetscInt bs2 = bs*bs; 106 MatScalar *v,*diag; 107 PetscScalar *xk,*xj,*xk_tmp; 108 109 PetscFunctionBegin; 110 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 111 for (k=0; k<mbs; k++){ 112 v = aa + bs2*ai[k]; 113 xk = x + k*bs; /* Dk*xk = k-th block of x */ 114 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 115 nz = ai[k+1] - ai[k]; 116 vj = aj + ai[k]; 117 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 118 while (nz--) { 119 /* x(:) += U(k,:)^T*(Dk*xk) */ 120 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 121 vj++; xj = x + (*vj)*bs; 122 v += bs2; 123 } 124 /* xk = inv(Dk)*(Dk*xk) */ 125 diag = aa+k*bs2; /* ptr to inv(Dk) */ 126 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 127 } 128 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 134 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 135 { 136 PetscInt nz,*vj,k; 137 PetscInt bs2 = bs*bs; 138 MatScalar *v; 139 PetscScalar *xk,*xj; 140 141 PetscFunctionBegin; 142 for (k=mbs-1; k>=0; k--){ 143 v = aa + bs2*ai[k]; 144 xk = x + k*bs; /* xk */ 145 nz = ai[k+1] - ai[k]; 146 vj = aj + ai[k]; 147 xj = x + (*vj)*bs; 148 while (nz--) { 149 /* xk += U(k,:)*x(:) */ 150 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 151 vj++; 152 v += bs2; xj = x + (*vj)*bs; 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 160 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 161 { 162 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 163 PetscErrorCode ierr; 164 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 165 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 166 MatScalar *aa=a->a; 167 PetscScalar *x,*b; 168 169 PetscFunctionBegin; 170 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 171 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 172 173 /* solve U^T * D * y = b by forward substitution */ 174 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 175 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 176 177 /* solve U*x = y by back substitution */ 178 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 179 180 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 181 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 182 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering" 188 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 189 { 190 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 191 PetscErrorCode ierr; 192 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 193 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 194 MatScalar *aa=a->a; 195 PetscScalar *x,*b; 196 197 PetscFunctionBegin; 198 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 199 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 200 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 201 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 202 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 203 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 204 ierr = PetscLogFlops(bs2*a->nz + A->rmap.N);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering" 210 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 211 { 212 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 213 PetscErrorCode ierr; 214 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 215 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 216 MatScalar *aa=a->a; 217 PetscScalar *x,*b; 218 219 PetscFunctionBegin; 220 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 221 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 222 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 223 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 224 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 225 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 226 ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr); 227 PetscFunctionReturn(0); 228 } 229 230 #undef __FUNCT__ 231 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 232 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 233 { 234 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 235 IS isrow=a->row; 236 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 237 PetscErrorCode ierr; 238 PetscInt nz,*vj,k,*r,idx; 239 MatScalar *aa=a->a,*v,*d; 240 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 241 242 PetscFunctionBegin; 243 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 244 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 245 t = a->solve_work; 246 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 247 248 /* solve U^T * D * y = b by forward substitution */ 249 tp = t; 250 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 251 idx = 7*r[k]; 252 tp[0] = b[idx]; 253 tp[1] = b[idx+1]; 254 tp[2] = b[idx+2]; 255 tp[3] = b[idx+3]; 256 tp[4] = b[idx+4]; 257 tp[5] = b[idx+5]; 258 tp[6] = b[idx+6]; 259 tp += 7; 260 } 261 262 for (k=0; k<mbs; k++){ 263 v = aa + 49*ai[k]; 264 vj = aj + ai[k]; 265 tp = t + k*7; 266 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 267 nz = ai[k+1] - ai[k]; 268 tp = t + (*vj)*7; 269 while (nz--) { 270 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 271 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 272 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 273 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 274 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 275 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 276 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 277 vj++; tp = t + (*vj)*7; 278 v += 49; 279 } 280 281 /* xk = inv(Dk)*(Dk*xk) */ 282 d = aa+k*49; /* ptr to inv(Dk) */ 283 tp = t + k*7; 284 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 285 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 286 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 287 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 288 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 289 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 290 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 291 } 292 293 /* solve U*x = y by back substitution */ 294 for (k=mbs-1; k>=0; k--){ 295 v = aa + 49*ai[k]; 296 vj = aj + ai[k]; 297 tp = t + k*7; 298 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 299 nz = ai[k+1] - ai[k]; 300 301 tp = t + (*vj)*7; 302 while (nz--) { 303 /* xk += U(k,:)*x(:) */ 304 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 305 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 306 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 307 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 308 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 309 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 310 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 311 vj++; tp = t + (*vj)*7; 312 v += 49; 313 } 314 tp = t + k*7; 315 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 316 idx = 7*r[k]; 317 x[idx] = x0; 318 x[idx+1] = x1; 319 x[idx+2] = x2; 320 x[idx+3] = x3; 321 x[idx+4] = x4; 322 x[idx+5] = x5; 323 x[idx+6] = x6; 324 } 325 326 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 327 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 328 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 329 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 330 PetscFunctionReturn(0); 331 } 332 333 #undef __FUNCT__ 334 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 335 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 336 { 337 MatScalar *v,*d; 338 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 339 PetscInt nz,*vj,k; 340 341 PetscFunctionBegin; 342 for (k=0; k<mbs; k++){ 343 v = aa + 49*ai[k]; 344 xp = x + k*7; 345 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 346 nz = ai[k+1] - ai[k]; 347 vj = aj + ai[k]; 348 xp = x + (*vj)*7; 349 while (nz--) { 350 /* x(:) += U(k,:)^T*(Dk*xk) */ 351 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 352 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 353 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 354 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 355 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 356 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 357 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 358 vj++; xp = x + (*vj)*7; 359 v += 49; 360 } 361 /* xk = inv(Dk)*(Dk*xk) */ 362 d = aa+k*49; /* ptr to inv(Dk) */ 363 xp = x + k*7; 364 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 365 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 366 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 367 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 368 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 369 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 370 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 371 } 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 377 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 378 { 379 MatScalar *v; 380 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 381 PetscInt nz,*vj,k; 382 383 PetscFunctionBegin; 384 for (k=mbs-1; k>=0; k--){ 385 v = aa + 49*ai[k]; 386 xp = x + k*7; 387 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 388 nz = ai[k+1] - ai[k]; 389 vj = aj + ai[k]; 390 xp = x + (*vj)*7; 391 while (nz--) { 392 /* xk += U(k,:)*x(:) */ 393 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 394 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 395 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 396 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 397 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 398 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 399 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 400 vj++; 401 v += 49; xp = x + (*vj)*7; 402 } 403 xp = x + k*7; 404 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 405 } 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 411 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 412 { 413 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 414 PetscErrorCode ierr; 415 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 416 MatScalar *aa=a->a; 417 PetscScalar *x,*b; 418 419 PetscFunctionBegin; 420 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 421 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 422 423 /* solve U^T * D * y = b by forward substitution */ 424 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 425 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 426 427 /* solve U*x = y by back substitution */ 428 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 429 430 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 431 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 432 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering" 438 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 439 { 440 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 441 PetscErrorCode ierr; 442 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 443 MatScalar *aa=a->a; 444 PetscScalar *x,*b; 445 446 PetscFunctionBegin; 447 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 448 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 449 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 450 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 451 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 452 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 453 ierr = PetscLogFlops(49*a->nz + mbs);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering" 459 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 460 { 461 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 462 PetscErrorCode ierr; 463 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 464 MatScalar *aa=a->a; 465 PetscScalar *x,*b; 466 467 PetscFunctionBegin; 468 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 469 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 470 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 471 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 472 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 473 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 474 ierr = PetscLogFlops(49*a->nz);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 480 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 481 { 482 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 483 IS isrow=a->row; 484 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 485 PetscErrorCode ierr; 486 PetscInt nz,*vj,k,*r,idx; 487 MatScalar *aa=a->a,*v,*d; 488 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 489 490 PetscFunctionBegin; 491 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 492 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 493 t = a->solve_work; 494 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 495 496 /* solve U^T * D * y = b by forward substitution */ 497 tp = t; 498 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 499 idx = 6*r[k]; 500 tp[0] = b[idx]; 501 tp[1] = b[idx+1]; 502 tp[2] = b[idx+2]; 503 tp[3] = b[idx+3]; 504 tp[4] = b[idx+4]; 505 tp[5] = b[idx+5]; 506 tp += 6; 507 } 508 509 for (k=0; k<mbs; k++){ 510 v = aa + 36*ai[k]; 511 vj = aj + ai[k]; 512 tp = t + k*6; 513 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 514 nz = ai[k+1] - ai[k]; 515 tp = t + (*vj)*6; 516 while (nz--) { 517 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 518 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 519 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 520 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 521 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 522 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 523 vj++; tp = t + (*vj)*6; 524 v += 36; 525 } 526 527 /* xk = inv(Dk)*(Dk*xk) */ 528 d = aa+k*36; /* ptr to inv(Dk) */ 529 tp = t + k*6; 530 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 531 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 532 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 533 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 534 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 535 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 536 } 537 538 /* solve U*x = y by back substitution */ 539 for (k=mbs-1; k>=0; k--){ 540 v = aa + 36*ai[k]; 541 vj = aj + ai[k]; 542 tp = t + k*6; 543 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 544 nz = ai[k+1] - ai[k]; 545 546 tp = t + (*vj)*6; 547 while (nz--) { 548 /* xk += U(k,:)*x(:) */ 549 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 550 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 551 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 552 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 553 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 554 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 555 vj++; tp = t + (*vj)*6; 556 v += 36; 557 } 558 tp = t + k*6; 559 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 560 idx = 6*r[k]; 561 x[idx] = x0; 562 x[idx+1] = x1; 563 x[idx+2] = x2; 564 x[idx+3] = x3; 565 x[idx+4] = x4; 566 x[idx+5] = x5; 567 } 568 569 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 570 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 571 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 572 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 578 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 579 { 580 MatScalar *v,*d; 581 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 582 PetscInt nz,*vj,k; 583 584 PetscFunctionBegin; 585 for (k=0; k<mbs; k++){ 586 v = aa + 36*ai[k]; 587 xp = x + k*6; 588 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 589 nz = ai[k+1] - ai[k]; 590 vj = aj + ai[k]; 591 xp = x + (*vj)*6; 592 while (nz--) { 593 /* x(:) += U(k,:)^T*(Dk*xk) */ 594 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 595 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 596 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 597 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 598 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 599 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 600 vj++; xp = x + (*vj)*6; 601 v += 36; 602 } 603 /* xk = inv(Dk)*(Dk*xk) */ 604 d = aa+k*36; /* ptr to inv(Dk) */ 605 xp = x + k*6; 606 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 607 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 608 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 609 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 610 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 611 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 612 } 613 PetscFunctionReturn(0); 614 } 615 #undef __FUNCT__ 616 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 617 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 618 { 619 MatScalar *v; 620 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 621 PetscInt nz,*vj,k; 622 623 PetscFunctionBegin; 624 for (k=mbs-1; k>=0; k--){ 625 v = aa + 36*ai[k]; 626 xp = x + k*6; 627 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 628 nz = ai[k+1] - ai[k]; 629 vj = aj + ai[k]; 630 xp = x + (*vj)*6; 631 while (nz--) { 632 /* xk += U(k,:)*x(:) */ 633 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 634 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 635 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 636 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 637 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 638 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 639 vj++; 640 v += 36; xp = x + (*vj)*6; 641 } 642 xp = x + k*6; 643 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 644 } 645 PetscFunctionReturn(0); 646 } 647 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 651 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 652 { 653 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 654 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 655 MatScalar *aa=a->a; 656 PetscScalar *x,*b; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 661 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 662 663 /* solve U^T * D * y = b by forward substitution */ 664 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 665 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 666 667 /* solve U*x = y by back substitution */ 668 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 669 670 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 671 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 672 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering" 678 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 679 { 680 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 681 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 682 MatScalar *aa=a->a; 683 PetscScalar *x,*b; 684 PetscErrorCode ierr; 685 686 PetscFunctionBegin; 687 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 688 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 689 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 690 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 691 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 692 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 693 ierr = PetscLogFlops(36*a->nz + mbs);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering" 699 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 700 { 701 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 702 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 703 MatScalar *aa=a->a; 704 PetscScalar *x,*b; 705 PetscErrorCode ierr; 706 707 PetscFunctionBegin; 708 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 709 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 710 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 711 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 712 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 713 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 714 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 720 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 721 { 722 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 723 IS isrow=a->row; 724 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 725 PetscErrorCode ierr; 726 PetscInt nz,*vj,k,*r,idx; 727 MatScalar *aa=a->a,*v,*diag; 728 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 729 730 PetscFunctionBegin; 731 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 732 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 733 t = a->solve_work; 734 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 735 736 /* solve U^T * D * y = b by forward substitution */ 737 tp = t; 738 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 739 idx = 5*r[k]; 740 tp[0] = b[idx]; 741 tp[1] = b[idx+1]; 742 tp[2] = b[idx+2]; 743 tp[3] = b[idx+3]; 744 tp[4] = b[idx+4]; 745 tp += 5; 746 } 747 748 for (k=0; k<mbs; k++){ 749 v = aa + 25*ai[k]; 750 vj = aj + ai[k]; 751 tp = t + k*5; 752 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 753 nz = ai[k+1] - ai[k]; 754 755 tp = t + (*vj)*5; 756 while (nz--) { 757 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 758 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 759 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 760 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 761 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 762 vj++; tp = t + (*vj)*5; 763 v += 25; 764 } 765 766 /* xk = inv(Dk)*(Dk*xk) */ 767 diag = aa+k*25; /* ptr to inv(Dk) */ 768 tp = t + k*5; 769 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 770 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 771 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 772 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 773 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 774 } 775 776 /* solve U*x = y by back substitution */ 777 for (k=mbs-1; k>=0; k--){ 778 v = aa + 25*ai[k]; 779 vj = aj + ai[k]; 780 tp = t + k*5; 781 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 782 nz = ai[k+1] - ai[k]; 783 784 tp = t + (*vj)*5; 785 while (nz--) { 786 /* xk += U(k,:)*x(:) */ 787 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 788 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 789 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 790 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 791 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 792 vj++; tp = t + (*vj)*5; 793 v += 25; 794 } 795 tp = t + k*5; 796 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 797 idx = 5*r[k]; 798 x[idx] = x0; 799 x[idx+1] = x1; 800 x[idx+2] = x2; 801 x[idx+3] = x3; 802 x[idx+4] = x4; 803 } 804 805 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 806 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 807 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 808 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 814 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 815 { 816 MatScalar *v,*diag; 817 PetscScalar *xp,x0,x1,x2,x3,x4; 818 PetscInt nz,*vj,k; 819 820 PetscFunctionBegin; 821 for (k=0; k<mbs; k++){ 822 v = aa + 25*ai[k]; 823 xp = x + k*5; 824 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 825 nz = ai[k+1] - ai[k]; 826 vj = aj + ai[k]; 827 xp = x + (*vj)*5; 828 while (nz--) { 829 /* x(:) += U(k,:)^T*(Dk*xk) */ 830 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 831 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 832 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 833 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 834 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 835 vj++; xp = x + (*vj)*5; 836 v += 25; 837 } 838 /* xk = inv(Dk)*(Dk*xk) */ 839 diag = aa+k*25; /* ptr to inv(Dk) */ 840 xp = x + k*5; 841 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 842 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 843 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 844 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 845 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 846 } 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 852 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 853 { 854 MatScalar *v; 855 PetscScalar *xp,x0,x1,x2,x3,x4; 856 PetscInt nz,*vj,k; 857 858 PetscFunctionBegin; 859 for (k=mbs-1; k>=0; k--){ 860 v = aa + 25*ai[k]; 861 xp = x + k*5; 862 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 863 nz = ai[k+1] - ai[k]; 864 vj = aj + ai[k]; 865 xp = x + (*vj)*5; 866 while (nz--) { 867 /* xk += U(k,:)*x(:) */ 868 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 869 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 870 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 871 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 872 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 873 vj++; 874 v += 25; xp = x + (*vj)*5; 875 } 876 xp = x + k*5; 877 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 878 } 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 884 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 885 { 886 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 887 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 888 MatScalar *aa=a->a; 889 PetscScalar *x,*b; 890 PetscErrorCode ierr; 891 892 PetscFunctionBegin; 893 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 894 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 895 896 /* solve U^T * D * y = b by forward substitution */ 897 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 898 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 899 900 /* solve U*x = y by back substitution */ 901 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 902 903 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 904 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 905 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 909 #undef __FUNCT__ 910 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering" 911 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 912 { 913 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 914 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 915 MatScalar *aa=a->a; 916 PetscScalar *x,*b; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 921 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 922 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 923 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 924 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 925 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 926 ierr = PetscLogFlops(25*(a->nz + mbs));CHKERRQ(ierr); 927 PetscFunctionReturn(0); 928 } 929 930 #undef __FUNCT__ 931 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering" 932 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 933 { 934 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 935 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 936 MatScalar *aa=a->a; 937 PetscScalar *x,*b; 938 PetscErrorCode ierr; 939 940 PetscFunctionBegin; 941 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 942 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 943 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 944 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 945 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 946 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 947 ierr = PetscLogFlops(25*a->nz);CHKERRQ(ierr); 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 953 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 954 { 955 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 956 IS isrow=a->row; 957 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 958 PetscErrorCode ierr; 959 PetscInt nz,*vj,k,*r,idx; 960 MatScalar *aa=a->a,*v,*diag; 961 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 962 963 PetscFunctionBegin; 964 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 965 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 966 t = a->solve_work; 967 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 968 969 /* solve U^T * D * y = b by forward substitution */ 970 tp = t; 971 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 972 idx = 4*r[k]; 973 tp[0] = b[idx]; 974 tp[1] = b[idx+1]; 975 tp[2] = b[idx+2]; 976 tp[3] = b[idx+3]; 977 tp += 4; 978 } 979 980 for (k=0; k<mbs; k++){ 981 v = aa + 16*ai[k]; 982 vj = aj + ai[k]; 983 tp = t + k*4; 984 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 985 nz = ai[k+1] - ai[k]; 986 987 tp = t + (*vj)*4; 988 while (nz--) { 989 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 990 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 991 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 992 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 993 vj++; tp = t + (*vj)*4; 994 v += 16; 995 } 996 997 /* xk = inv(Dk)*(Dk*xk) */ 998 diag = aa+k*16; /* ptr to inv(Dk) */ 999 tp = t + k*4; 1000 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1001 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1002 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1003 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1004 } 1005 1006 /* solve U*x = y by back substitution */ 1007 for (k=mbs-1; k>=0; k--){ 1008 v = aa + 16*ai[k]; 1009 vj = aj + ai[k]; 1010 tp = t + k*4; 1011 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 1012 nz = ai[k+1] - ai[k]; 1013 1014 tp = t + (*vj)*4; 1015 while (nz--) { 1016 /* xk += U(k,:)*x(:) */ 1017 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 1018 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 1019 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 1020 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 1021 vj++; tp = t + (*vj)*4; 1022 v += 16; 1023 } 1024 tp = t + k*4; 1025 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 1026 idx = 4*r[k]; 1027 x[idx] = x0; 1028 x[idx+1] = x1; 1029 x[idx+2] = x2; 1030 x[idx+3] = x3; 1031 } 1032 1033 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1034 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1035 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1036 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 1037 PetscFunctionReturn(0); 1038 } 1039 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1042 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1043 { 1044 MatScalar *v,*diag; 1045 PetscScalar *xp,x0,x1,x2,x3; 1046 PetscInt nz,*vj,k; 1047 1048 PetscFunctionBegin; 1049 for (k=0; k<mbs; k++){ 1050 v = aa + 16*ai[k]; 1051 xp = x + k*4; 1052 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 1053 nz = ai[k+1] - ai[k]; 1054 vj = aj + ai[k]; 1055 xp = x + (*vj)*4; 1056 while (nz--) { 1057 /* x(:) += U(k,:)^T*(Dk*xk) */ 1058 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1059 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1060 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1061 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1062 vj++; xp = x + (*vj)*4; 1063 v += 16; 1064 } 1065 /* xk = inv(Dk)*(Dk*xk) */ 1066 diag = aa+k*16; /* ptr to inv(Dk) */ 1067 xp = x + k*4; 1068 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1069 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1070 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1071 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1072 } 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1078 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1079 { 1080 MatScalar *v; 1081 PetscScalar *xp,x0,x1,x2,x3; 1082 PetscInt nz,*vj,k; 1083 1084 PetscFunctionBegin; 1085 for (k=mbs-1; k>=0; k--){ 1086 v = aa + 16*ai[k]; 1087 xp = x + k*4; 1088 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 1089 nz = ai[k+1] - ai[k]; 1090 vj = aj + ai[k]; 1091 xp = x + (*vj)*4; 1092 while (nz--) { 1093 /* xk += U(k,:)*x(:) */ 1094 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 1095 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 1096 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 1097 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 1098 vj++; 1099 v += 16; xp = x + (*vj)*4; 1100 } 1101 xp = x + k*4; 1102 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 1103 } 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 1109 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1110 { 1111 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1112 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1113 MatScalar *aa=a->a; 1114 PetscScalar *x,*b; 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1119 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1120 1121 /* solve U^T * D * y = b by forward substitution */ 1122 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1123 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1124 1125 /* solve U*x = y by back substitution */ 1126 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1127 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1128 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1129 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering" 1135 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1136 { 1137 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1138 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1139 MatScalar *aa=a->a; 1140 PetscScalar *x,*b; 1141 PetscErrorCode ierr; 1142 1143 PetscFunctionBegin; 1144 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1145 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1146 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1147 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1148 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1149 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1150 ierr = PetscLogFlops(16*a->nz + mbs);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering" 1156 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1157 { 1158 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1159 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1160 MatScalar *aa=a->a; 1161 PetscScalar *x,*b; 1162 PetscErrorCode ierr; 1163 1164 PetscFunctionBegin; 1165 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1166 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1167 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1168 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1169 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1170 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1171 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 1177 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 1178 { 1179 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1180 IS isrow=a->row; 1181 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1182 PetscErrorCode ierr; 1183 PetscInt nz,*vj,k,*r,idx; 1184 MatScalar *aa=a->a,*v,*diag; 1185 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 1186 1187 PetscFunctionBegin; 1188 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1189 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1190 t = a->solve_work; 1191 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1192 1193 /* solve U^T * D * y = b by forward substitution */ 1194 tp = t; 1195 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1196 idx = 3*r[k]; 1197 tp[0] = b[idx]; 1198 tp[1] = b[idx+1]; 1199 tp[2] = b[idx+2]; 1200 tp += 3; 1201 } 1202 1203 for (k=0; k<mbs; k++){ 1204 v = aa + 9*ai[k]; 1205 vj = aj + ai[k]; 1206 tp = t + k*3; 1207 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 1208 nz = ai[k+1] - ai[k]; 1209 1210 tp = t + (*vj)*3; 1211 while (nz--) { 1212 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1213 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1214 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1215 vj++; tp = t + (*vj)*3; 1216 v += 9; 1217 } 1218 1219 /* xk = inv(Dk)*(Dk*xk) */ 1220 diag = aa+k*9; /* ptr to inv(Dk) */ 1221 tp = t + k*3; 1222 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1223 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1224 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1225 } 1226 1227 /* solve U*x = y by back substitution */ 1228 for (k=mbs-1; k>=0; k--){ 1229 v = aa + 9*ai[k]; 1230 vj = aj + ai[k]; 1231 tp = t + k*3; 1232 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 1233 nz = ai[k+1] - ai[k]; 1234 1235 tp = t + (*vj)*3; 1236 while (nz--) { 1237 /* xk += U(k,:)*x(:) */ 1238 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 1239 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 1240 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 1241 vj++; tp = t + (*vj)*3; 1242 v += 9; 1243 } 1244 tp = t + k*3; 1245 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1246 idx = 3*r[k]; 1247 x[idx] = x0; 1248 x[idx+1] = x1; 1249 x[idx+2] = x2; 1250 } 1251 1252 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1253 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1254 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1255 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1261 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1262 { 1263 MatScalar *v,*diag; 1264 PetscScalar *xp,x0,x1,x2; 1265 PetscInt nz,*vj,k; 1266 1267 PetscFunctionBegin; 1268 for (k=0; k<mbs; k++){ 1269 v = aa + 9*ai[k]; 1270 xp = x + k*3; 1271 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1272 nz = ai[k+1] - ai[k]; 1273 vj = aj + ai[k]; 1274 xp = x + (*vj)*3; 1275 while (nz--) { 1276 /* x(:) += U(k,:)^T*(Dk*xk) */ 1277 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1278 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1279 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1280 vj++; xp = x + (*vj)*3; 1281 v += 9; 1282 } 1283 /* xk = inv(Dk)*(Dk*xk) */ 1284 diag = aa+k*9; /* ptr to inv(Dk) */ 1285 xp = x + k*3; 1286 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1287 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1288 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1289 } 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1295 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1296 { 1297 MatScalar *v; 1298 PetscScalar *xp,x0,x1,x2; 1299 PetscInt nz,*vj,k; 1300 1301 PetscFunctionBegin; 1302 for (k=mbs-1; k>=0; k--){ 1303 v = aa + 9*ai[k]; 1304 xp = x + k*3; 1305 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1306 nz = ai[k+1] - ai[k]; 1307 vj = aj + ai[k]; 1308 xp = x + (*vj)*3; 1309 while (nz--) { 1310 /* xk += U(k,:)*x(:) */ 1311 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1312 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1313 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1314 vj++; 1315 v += 9; xp = x + (*vj)*3; 1316 } 1317 xp = x + k*3; 1318 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1319 } 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1325 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1326 { 1327 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1328 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1329 MatScalar *aa=a->a; 1330 PetscScalar *x,*b; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1335 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1336 1337 /* solve U^T * D * y = b by forward substitution */ 1338 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1339 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1340 1341 /* solve U*x = y by back substitution */ 1342 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1343 1344 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1345 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1346 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering" 1352 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1353 { 1354 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1355 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1356 MatScalar *aa=a->a; 1357 PetscScalar *x,*b; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1362 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1363 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1364 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1365 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1366 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1367 ierr = PetscLogFlops(9*(a->nz + mbs));CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering" 1373 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1374 { 1375 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1376 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1377 MatScalar *aa=a->a; 1378 PetscScalar *x,*b; 1379 PetscErrorCode ierr; 1380 1381 PetscFunctionBegin; 1382 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1383 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1384 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1385 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1386 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1387 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1388 ierr = PetscLogFlops(9*a->nz);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1394 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1395 { 1396 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1397 IS isrow=a->row; 1398 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1399 PetscErrorCode ierr; 1400 PetscInt nz,*vj,k,k2,*r,idx; 1401 MatScalar *aa=a->a,*v,*diag; 1402 PetscScalar *x,*b,x0,x1,*t; 1403 1404 PetscFunctionBegin; 1405 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1406 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1407 t = a->solve_work; 1408 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1409 1410 /* solve U^T * D * y = perm(b) by forward substitution */ 1411 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1412 idx = 2*r[k]; 1413 t[k*2] = b[idx]; 1414 t[k*2+1] = b[idx+1]; 1415 } 1416 for (k=0; k<mbs; k++){ 1417 v = aa + 4*ai[k]; 1418 vj = aj + ai[k]; 1419 k2 = k*2; 1420 x0 = t[k2]; x1 = t[k2+1]; 1421 nz = ai[k+1] - ai[k]; 1422 while (nz--) { 1423 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1424 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1425 vj++; v += 4; 1426 } 1427 diag = aa+k*4; /* ptr to inv(Dk) */ 1428 t[k2] = diag[0]*x0 + diag[2]*x1; 1429 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1430 } 1431 1432 /* solve U*x = y by back substitution */ 1433 for (k=mbs-1; k>=0; k--){ 1434 v = aa + 4*ai[k]; 1435 vj = aj + ai[k]; 1436 k2 = k*2; 1437 x0 = t[k2]; x1 = t[k2+1]; 1438 nz = ai[k+1] - ai[k]; 1439 while (nz--) { 1440 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1441 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1442 vj++; v += 4; 1443 } 1444 t[k2] = x0; 1445 t[k2+1] = x1; 1446 idx = 2*r[k]; 1447 x[idx] = x0; 1448 x[idx+1] = x1; 1449 } 1450 1451 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1452 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1453 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1454 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1460 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1461 { 1462 MatScalar *v,*diag; 1463 PetscScalar x0,x1; 1464 PetscInt nz,*vj,k,k2; 1465 1466 PetscFunctionBegin; 1467 for (k=0; k<mbs; k++){ 1468 v = aa + 4*ai[k]; 1469 vj = aj + ai[k]; 1470 k2 = k*2; 1471 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1472 nz = ai[k+1] - ai[k]; 1473 1474 while (nz--) { 1475 /* x(:) += U(k,:)^T*(Dk*xk) */ 1476 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1477 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1478 vj++; v += 4; 1479 } 1480 /* xk = inv(Dk)*(Dk*xk) */ 1481 diag = aa+k*4; /* ptr to inv(Dk) */ 1482 x[k2] = diag[0]*x0 + diag[2]*x1; 1483 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1490 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1491 { 1492 MatScalar *v; 1493 PetscScalar x0,x1; 1494 PetscInt nz,*vj,k,k2; 1495 1496 PetscFunctionBegin; 1497 for (k=mbs-1; k>=0; k--){ 1498 v = aa + 4*ai[k]; 1499 vj = aj + ai[k]; 1500 k2 = k*2; 1501 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1502 nz = ai[k+1] - ai[k]; 1503 while (nz--) { 1504 /* xk += U(k,:)*x(:) */ 1505 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1506 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1507 vj++; v += 4; 1508 } 1509 x[k2] = x0; 1510 x[k2+1] = x1; 1511 } 1512 PetscFunctionReturn(0); 1513 } 1514 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1517 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1518 { 1519 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1520 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1521 MatScalar *aa=a->a; 1522 PetscScalar *x,*b; 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1527 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1528 1529 /* solve U^T * D * y = b by forward substitution */ 1530 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1531 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1532 1533 /* solve U*x = y by back substitution */ 1534 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1535 1536 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1537 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1538 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */ 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering" 1544 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1545 { 1546 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1547 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1548 MatScalar *aa=a->a; 1549 PetscScalar *x,*b; 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1554 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1555 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1556 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1557 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1558 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1559 ierr = PetscLogFlops(4*(a->nz + mbs));CHKERRQ(ierr); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering" 1565 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1566 { 1567 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1568 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1569 MatScalar *aa=a->a; 1570 PetscScalar *x,*b; 1571 PetscErrorCode ierr; 1572 1573 PetscFunctionBegin; 1574 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1575 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1576 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1577 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1578 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1579 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1580 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1586 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1587 { 1588 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1589 IS isrow=a->row; 1590 PetscErrorCode ierr; 1591 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1592 MatScalar *aa=a->a,*v; 1593 PetscScalar *x,*b,xk,*t; 1594 PetscInt nz,*vj,k; 1595 1596 PetscFunctionBegin; 1597 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1598 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1599 t = a->solve_work; 1600 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1601 1602 /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */ 1603 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1604 for (k=0; k<mbs; k++){ 1605 v = aa + ai[k] + 1; 1606 vj = aj + ai[k] + 1; 1607 xk = t[k]; 1608 nz = ai[k+1] - ai[k] - 1; 1609 while (nz--) t[*vj++] += (*v++) * xk; 1610 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1611 } 1612 1613 /* solve U*perm(x) = y by back substitution */ 1614 for (k=mbs-1; k>=0; k--){ 1615 v = aa + ai[k] + 1; 1616 vj = aj + ai[k] + 1; 1617 nz = ai[k+1] - ai[k] - 1; 1618 while (nz--) t[k] += (*v++) * t[*vj++]; 1619 x[rp[k]] = t[k]; 1620 } 1621 1622 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1623 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1624 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1625 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1" 1631 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1632 { 1633 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1634 IS isrow=a->row; 1635 PetscErrorCode ierr; 1636 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1637 MatScalar *aa=a->a,*v; 1638 PetscReal diagk; 1639 PetscScalar *x,*b,xk; 1640 PetscInt nz,*vj,k; 1641 1642 PetscFunctionBegin; 1643 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ 1644 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1645 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1646 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1647 1648 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1649 for (k=0; k<mbs; k++){ 1650 v = aa + ai[k] + 1; 1651 vj = aj + ai[k] + 1; 1652 xk = x[k]; 1653 nz = ai[k+1] - ai[k] - 1; 1654 while (nz--) x[*vj++] += (*v++) * xk; 1655 1656 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1657 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1658 x[k] = xk*sqrt(diagk); 1659 } 1660 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1661 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1662 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1663 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1" 1669 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1670 { 1671 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1672 IS isrow=a->row; 1673 PetscErrorCode ierr; 1674 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1675 MatScalar *aa=a->a,*v; 1676 PetscReal diagk; 1677 PetscScalar *x,*b,*t; 1678 PetscInt nz,*vj,k; 1679 1680 PetscFunctionBegin; 1681 /* solve D^(1/2)*U*perm(x) = b by back substitution */ 1682 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1683 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1684 t = a->solve_work; 1685 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1686 1687 for (k=mbs-1; k>=0; k--){ 1688 v = aa + ai[k] + 1; 1689 vj = aj + ai[k] + 1; 1690 diagk = PetscRealPart(aa[ai[k]]); 1691 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1692 t[k] = b[k] * sqrt(diagk); 1693 nz = ai[k+1] - ai[k] - 1; 1694 while (nz--) t[k] += (*v++) * t[*vj++]; 1695 x[rp[k]] = t[k]; 1696 } 1697 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1698 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1699 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1700 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1706 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1707 { 1708 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1709 PetscErrorCode ierr; 1710 1711 PetscFunctionBegin; 1712 if (A->rmap.bs == 1) { 1713 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1714 } else { 1715 IS isrow=a->row; 1716 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,i; 1717 MatScalar *aa=a->a,*v; 1718 PetscScalar *x,*b,*t; 1719 PetscInt nz,*vj,k,n; 1720 if (bb->n > a->solves_work_n) { 1721 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1722 ierr = PetscMalloc(bb->n*A->rmap.N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1723 a->solves_work_n = bb->n; 1724 } 1725 n = bb->n; 1726 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1727 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1728 t = a->solves_work; 1729 1730 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1731 1732 /* solve U^T*D*y = perm(b) by forward substitution */ 1733 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */ 1734 for (k=0; k<mbs; k++){ 1735 v = aa + ai[k]; 1736 vj = aj + ai[k]; 1737 nz = ai[k+1] - ai[k]; 1738 while (nz--) { 1739 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1740 v++;vj++; 1741 } 1742 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1743 } 1744 1745 /* solve U*perm(x) = y by back substitution */ 1746 for (k=mbs-1; k>=0; k--){ 1747 v = aa + ai[k]; 1748 vj = aj + ai[k]; 1749 nz = ai[k+1] - ai[k]; 1750 while (nz--) { 1751 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1752 v++;vj++; 1753 } 1754 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i]; 1755 } 1756 1757 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1758 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1759 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1760 ierr = PetscLogFlops(bb->n*(4*a->nz + A->rmap.N));CHKERRQ(ierr); 1761 } 1762 PetscFunctionReturn(0); 1763 } 1764 1765 /* 1766 Special case where the matrix was ILU(0) factored in the natural 1767 ordering. This eliminates the need for the column and row permutation. 1768 */ 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1771 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1772 { 1773 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1774 PetscErrorCode ierr; 1775 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1776 MatScalar *aa=a->a,*v; 1777 PetscScalar *x,*b,xk; 1778 PetscInt nz,*vj,k; 1779 1780 PetscFunctionBegin; 1781 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1782 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1783 1784 /* solve U^T*D*y = b by forward substitution */ 1785 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1786 for (k=0; k<mbs; k++){ 1787 v = aa + ai[k] + 1; 1788 vj = aj + ai[k] + 1; 1789 xk = x[k]; 1790 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1791 while (nz--) x[*vj++] += (*v++) * xk; 1792 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1793 } 1794 1795 /* solve U*x = y by back substitution */ 1796 for (k=mbs-2; k>=0; k--){ 1797 v = aa + ai[k] + 1; 1798 vj = aj + ai[k] + 1; 1799 xk = x[k]; 1800 nz = ai[k+1] - ai[k] - 1; 1801 while (nz--) xk += (*v++) * x[*vj++]; 1802 x[k] = xk; 1803 } 1804 1805 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1806 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1807 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 1808 PetscFunctionReturn(0); 1809 } 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" 1813 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1814 { 1815 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1816 PetscErrorCode ierr; 1817 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1818 MatScalar *aa=a->a,*v; 1819 PetscReal diagk; 1820 PetscScalar *x,*b; 1821 PetscInt nz,*vj,k; 1822 1823 PetscFunctionBegin; 1824 /* solve U^T*D^(1/2)*x = b by forward substitution */ 1825 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1826 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1827 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1828 for (k=0; k<mbs; k++){ 1829 v = aa + ai[k] + 1; 1830 vj = aj + ai[k] + 1; 1831 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1832 while (nz--) x[*vj++] += (*v++) * x[k]; 1833 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1834 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]])); 1835 x[k] *= sqrt(diagk); 1836 } 1837 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1838 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1839 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1840 PetscFunctionReturn(0); 1841 } 1842 1843 #undef __FUNCT__ 1844 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" 1845 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1846 { 1847 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1848 PetscErrorCode ierr; 1849 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1850 MatScalar *aa=a->a,*v; 1851 PetscReal diagk; 1852 PetscScalar *x,*b; 1853 PetscInt nz,*vj,k; 1854 1855 PetscFunctionBegin; 1856 /* solve D^(1/2)*U*x = b by back substitution */ 1857 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1858 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1859 1860 for (k=mbs-1; k>=0; k--){ 1861 v = aa + ai[k] + 1; 1862 vj = aj + ai[k] + 1; 1863 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1864 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1865 x[k] = sqrt(diagk)*b[k]; 1866 nz = ai[k+1] - ai[k] - 1; 1867 while (nz--) x[k] += (*v++) * x[*vj++]; 1868 } 1869 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1870 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1871 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 1878 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1879 { 1880 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1881 PetscErrorCode ierr; 1882 PetscInt *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1883 PetscInt *jutmp,bs = A->rmap.bs,bs2=a->bs2; 1884 PetscInt m,reallocs = 0,*levtmp; 1885 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 1886 PetscInt incrlev,*lev,shift,prow,nz; 1887 PetscReal f = info->fill,levels = info->levels; 1888 PetscTruth perm_identity; 1889 1890 PetscFunctionBegin; 1891 /* check whether perm is the identity mapping */ 1892 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1893 1894 if (perm_identity){ 1895 a->permute = PETSC_FALSE; 1896 ai = a->i; aj = a->j; 1897 } else { /* non-trivial permutation */ 1898 a->permute = PETSC_TRUE; 1899 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1900 ai = a->inew; aj = a->jnew; 1901 } 1902 1903 /* initialization */ 1904 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1905 umax = (PetscInt)(f*ai[mbs] + 1); 1906 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 1907 umax += mbs + 1; 1908 shift = mbs + 1; 1909 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 1910 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 1911 iu[0] = mbs + 1; 1912 juidx = mbs + 1; 1913 /* prowl: linked list for pivot row */ 1914 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr); 1915 /* q: linked list for col index */ 1916 q = prowl + mbs; 1917 levtmp = q + mbs; 1918 1919 for (i=0; i<mbs; i++){ 1920 prowl[i] = mbs; 1921 q[i] = 0; 1922 } 1923 1924 /* for each row k */ 1925 for (k=0; k<mbs; k++){ 1926 nzk = 0; 1927 q[k] = mbs; 1928 /* copy current row into linked list */ 1929 nz = ai[rip[k]+1] - ai[rip[k]]; 1930 j = ai[rip[k]]; 1931 while (nz--){ 1932 vj = rip[aj[j++]]; 1933 if (vj > k){ 1934 qm = k; 1935 do { 1936 m = qm; qm = q[m]; 1937 } while(qm < vj); 1938 if (qm == vj) { 1939 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 1940 } 1941 nzk++; 1942 q[m] = vj; 1943 q[vj] = qm; 1944 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1945 } 1946 } 1947 1948 /* modify nonzero structure of k-th row by computing fill-in 1949 for each row prow to be merged in */ 1950 prow = k; 1951 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1952 1953 while (prow < k){ 1954 /* merge row prow into k-th row */ 1955 jmin = iu[prow] + 1; 1956 jmax = iu[prow+1]; 1957 qm = k; 1958 for (j=jmin; j<jmax; j++){ 1959 incrlev = lev[j-shift] + 1; 1960 if (incrlev > levels) continue; 1961 vj = ju[j]; 1962 do { 1963 m = qm; qm = q[m]; 1964 } while (qm < vj); 1965 if (qm != vj){ /* a fill */ 1966 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1967 levtmp[vj] = incrlev; 1968 } else { 1969 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1970 } 1971 } 1972 prow = prowl[prow]; /* next pivot row */ 1973 } 1974 1975 /* add k to row list for first nonzero element in k-th row */ 1976 if (nzk > 1){ 1977 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1978 prowl[k] = prowl[i]; prowl[i] = k; 1979 } 1980 iu[k+1] = iu[k] + nzk; 1981 1982 /* allocate more space to ju and lev if needed */ 1983 if (iu[k+1] > umax) { 1984 /* estimate how much additional space we will need */ 1985 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1986 /* just double the memory each time */ 1987 maxadd = umax; 1988 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1989 umax += maxadd; 1990 1991 /* allocate a longer ju */ 1992 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1993 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 1994 ierr = PetscFree(ju);CHKERRQ(ierr); 1995 ju = jutmp; 1996 1997 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1998 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 1999 ierr = PetscFree(lev);CHKERRQ(ierr); 2000 lev = jutmp; 2001 reallocs += 2; /* count how many times we realloc */ 2002 } 2003 2004 /* save nonzero structure of k-th row in ju */ 2005 i=k; 2006 while (nzk --) { 2007 i = q[i]; 2008 ju[juidx] = i; 2009 lev[juidx-shift] = levtmp[i]; 2010 juidx++; 2011 } 2012 } 2013 2014 #if defined(PETSC_USE_INFO) 2015 if (ai[mbs] != 0) { 2016 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2017 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2018 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2019 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 2020 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2021 } else { 2022 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2023 } 2024 #endif 2025 2026 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2027 ierr = PetscFree(prowl);CHKERRQ(ierr); 2028 ierr = PetscFree(lev);CHKERRQ(ierr); 2029 2030 /* put together the new matrix */ 2031 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 2032 ierr = MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);CHKERRQ(ierr); 2033 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 2034 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 2035 2036 /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */ 2037 b = (Mat_SeqSBAIJ*)(*B)->data; 2038 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2039 b->singlemalloc = PETSC_FALSE; 2040 b->free_a = PETSC_TRUE; 2041 b->free_ij = PETSC_TRUE; 2042 /* the next line frees the default space generated by the Create() */ 2043 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 2044 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 2045 b->j = ju; 2046 b->i = iu; 2047 b->diag = 0; 2048 b->ilen = 0; 2049 b->imax = 0; 2050 2051 if (b->row) { 2052 ierr = ISDestroy(b->row);CHKERRQ(ierr); 2053 } 2054 if (b->icol) { 2055 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 2056 } 2057 b->row = perm; 2058 b->icol = perm; 2059 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2060 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2061 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2062 /* In b structure: Free imax, ilen, old a, old j. 2063 Allocate idnew, solve_work, new a, new j */ 2064 ierr = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2065 b->maxnz = b->nz = iu[mbs]; 2066 2067 (*B)->factor = FACTOR_CHOLESKY; 2068 (*B)->info.factor_mallocs = reallocs; 2069 (*B)->info.fill_ratio_given = f; 2070 if (ai[mbs] != 0) { 2071 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2072 } else { 2073 (*B)->info.fill_ratio_needed = 0.0; 2074 } 2075 2076 if (perm_identity){ 2077 switch (bs) { 2078 case 1: 2079 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 2080 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2081 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2082 (*B)->ops->solves = MatSolves_SeqSBAIJ_1; 2083 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr); 2084 break; 2085 case 2: 2086 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 2087 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2088 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2089 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 2090 break; 2091 case 3: 2092 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 2093 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2094 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2095 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 2096 break; 2097 case 4: 2098 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 2099 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2100 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2101 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 2102 break; 2103 case 5: 2104 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 2105 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2106 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2107 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 2108 break; 2109 case 6: 2110 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 2111 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2112 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2113 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 2114 break; 2115 case 7: 2116 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 2117 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2118 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2119 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 2120 break; 2121 default: 2122 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 2123 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2124 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2125 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr); 2126 break; 2127 } 2128 } 2129 2130 PetscFunctionReturn(0); 2131 } 2132 2133 #include "petscbt.h" 2134 #include "src/mat/utils/freespace.h" 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 2137 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 2138 { 2139 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2140 Mat_SeqSBAIJ *b; 2141 Mat B; 2142 PetscErrorCode ierr; 2143 PetscTruth perm_identity,free_ij = PETSC_TRUE; 2144 PetscInt bs=A->rmap.bs,am=a->mbs; 2145 PetscInt reallocs=0,*rip,i,*ai,*aj,*ui; 2146 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2147 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 2148 PetscReal fill=info->fill,levels=info->levels,ratio_needed; 2149 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2150 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2151 PetscBT lnkbt; 2152 PetscScalar *ua; 2153 2154 PetscFunctionBegin; 2155 /* 2156 This code originally uses Modified Sparse Row (MSR) storage 2157 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 2158 Then it is rewritten so the factor B takes seqsbaij format. However the associated 2159 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 2160 thus the original code in MSR format is still used for these cases. 2161 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 2162 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 2163 */ 2164 if (bs > 1){ 2165 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 /* check whether perm is the identity mapping */ 2170 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2171 2172 /* special case that simply copies fill pattern */ 2173 if (!levels && perm_identity) { 2174 a->permute = PETSC_FALSE; 2175 /* reuse the column pointers and row offsets for memory savings */ 2176 ui = a->i; 2177 uj = a->j; 2178 free_ij = PETSC_FALSE; 2179 ratio_needed = 1.0; 2180 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2181 if (perm_identity){ 2182 a->permute = PETSC_FALSE; 2183 ai = a->i; aj = a->j; 2184 } else { /* non-trivial permutation */ 2185 a->permute = PETSC_TRUE; 2186 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2187 ai = a->inew; aj = a->jnew; 2188 } 2189 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2190 2191 /* initialization */ 2192 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2193 ui[0] = 0; 2194 2195 /* jl: linked list for storing indices of the pivot rows 2196 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2197 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 2198 il = jl + am; 2199 uj_ptr = (PetscInt**)(il + am); 2200 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 2201 for (i=0; i<am; i++){ 2202 jl[i] = am; il[i] = 0; 2203 } 2204 2205 /* create and initialize a linked list for storing column indices of the active row k */ 2206 nlnk = am + 1; 2207 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2208 2209 /* initial FreeSpace size is fill*(ai[am]+1) */ 2210 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2211 current_space = free_space; 2212 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2213 current_space_lvl = free_space_lvl; 2214 2215 for (k=0; k<am; k++){ /* for each active row k */ 2216 /* initialize lnk by the column indices of row rip[k] */ 2217 nzk = 0; 2218 ncols = ai[rip[k]+1] - ai[rip[k]]; 2219 cols = aj+ai[rip[k]]; 2220 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2221 nzk += nlnk; 2222 2223 /* update lnk by computing fill-in for each pivot row to be merged in */ 2224 prow = jl[k]; /* 1st pivot row */ 2225 2226 while (prow < k){ 2227 nextprow = jl[prow]; 2228 2229 /* merge prow into k-th row */ 2230 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2231 jmax = ui[prow+1]; 2232 ncols = jmax-jmin; 2233 i = jmin - ui[prow]; 2234 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2235 j = *(uj_lvl_ptr[prow] + i - 1); 2236 cols_lvl = uj_lvl_ptr[prow]+i; 2237 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2238 nzk += nlnk; 2239 2240 /* update il and jl for prow */ 2241 if (jmin < jmax){ 2242 il[prow] = jmin; 2243 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2244 } 2245 prow = nextprow; 2246 } 2247 2248 /* if free space is not available, make more free space */ 2249 if (current_space->local_remaining<nzk) { 2250 i = am - k + 1; /* num of unfactored rows */ 2251 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2252 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2253 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2254 reallocs++; 2255 } 2256 2257 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2258 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2259 2260 /* add the k-th row into il and jl */ 2261 if (nzk-1 > 0){ 2262 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2263 jl[k] = jl[i]; jl[i] = k; 2264 il[k] = ui[k] + 1; 2265 } 2266 uj_ptr[k] = current_space->array; 2267 uj_lvl_ptr[k] = current_space_lvl->array; 2268 2269 current_space->array += nzk; 2270 current_space->local_used += nzk; 2271 current_space->local_remaining -= nzk; 2272 current_space_lvl->array += nzk; 2273 current_space_lvl->local_used += nzk; 2274 current_space_lvl->local_remaining -= nzk; 2275 2276 ui[k+1] = ui[k] + nzk; 2277 } 2278 2279 #if defined(PETSC_USE_INFO) 2280 if (ai[am] != 0) { 2281 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2282 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2283 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2284 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2285 } else { 2286 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2287 } 2288 #endif 2289 2290 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2291 ierr = PetscFree(jl);CHKERRQ(ierr); 2292 2293 /* destroy list of free space and other temporary array(s) */ 2294 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2295 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2296 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2297 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2298 if (ai[am] != 0) { 2299 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2300 } else { 2301 ratio_needed = 0.0; 2302 } 2303 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2304 2305 /* put together the new matrix in MATSEQSBAIJ format */ 2306 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&ua);CHKERRQ(ierr); 2307 ierr = MatCreateSeqSBAIJWithArrays(PETSC_COMM_SELF,1,am,am,ui,uj,ua,fact);CHKERRQ(ierr); 2308 B = *fact; 2309 b = (Mat_SeqSBAIJ*)B->data; 2310 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2311 b->singlemalloc = PETSC_FALSE; 2312 b->free_a = PETSC_TRUE; 2313 b->free_ij = free_ij; 2314 b->diag = 0; 2315 b->ilen = 0; 2316 b->imax = 0; 2317 b->row = perm; 2318 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2319 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2320 b->icol = perm; 2321 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2322 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2323 b->maxnz = b->nz = ui[am]; 2324 2325 B->factor = FACTOR_CHOLESKY; 2326 B->info.factor_mallocs = reallocs; 2327 B->info.fill_ratio_given = fill; 2328 B->info.fill_ratio_needed = ratio_needed; 2329 2330 if (perm_identity){ 2331 switch (bs) { 2332 case 1: 2333 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 2334 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2335 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2336 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr); 2337 break; 2338 case 2: 2339 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 2340 B->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2341 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2342 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 2343 break; 2344 case 3: 2345 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 2346 B->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2347 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2348 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 2349 break; 2350 case 4: 2351 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 2352 B->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2353 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2354 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 2355 break; 2356 case 5: 2357 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 2358 B->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2359 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2360 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 2361 break; 2362 case 6: 2363 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 2364 B->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2365 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2366 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 2367 break; 2368 case 7: 2369 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 2370 B->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2371 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2372 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 2373 break; 2374 default: 2375 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 2376 B->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2377 B->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2378 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr); 2379 break; 2380 } 2381 } 2382 PetscFunctionReturn(0); 2383 } 2384 2385