1 /* 2 Factorization code for SBAIJ format. 3 */ 4 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 #include "src/mat/impls/baij/seq/baij.h" 7 #include "src/inline/ilu.h" 8 #include "src/inline/dot.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering" 12 PetscErrorCode MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 13 { 14 PetscFunctionBegin; 15 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 16 } 17 18 #undef __FUNCT__ 19 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering" 20 PetscErrorCode MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 21 { 22 PetscFunctionBegin; 23 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 24 } 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering" 28 PetscErrorCode MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 29 { 30 PetscFunctionBegin; 31 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 32 } 33 34 #undef __FUNCT__ 35 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering" 36 PetscErrorCode MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 37 { 38 PetscFunctionBegin; 39 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 40 } 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering" 44 PetscErrorCode MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 45 { 46 PetscFunctionBegin; 47 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering" 52 PetscErrorCode MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 53 { 54 PetscFunctionBegin; 55 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering" 60 PetscErrorCode MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 61 { 62 PetscFunctionBegin; 63 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1" 68 PetscErrorCode MatSolveTranspose_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 69 { 70 PetscFunctionBegin; 71 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2" 76 PetscErrorCode MatSolveTranspose_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 77 { 78 PetscFunctionBegin; 79 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3" 84 PetscErrorCode MatSolveTranspose_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 85 { 86 PetscFunctionBegin; 87 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4" 92 PetscErrorCode MatSolveTranspose_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 93 { 94 PetscFunctionBegin; 95 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 96 } 97 98 #undef __FUNCT__ 99 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5" 100 PetscErrorCode MatSolveTranspose_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 101 { 102 PetscFunctionBegin; 103 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6" 108 PetscErrorCode MatSolveTranspose_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 109 { 110 PetscFunctionBegin; 111 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7" 116 PetscErrorCode MatSolveTranspose_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 117 { 118 PetscFunctionBegin; 119 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 124 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 125 { 126 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 127 IS isrow=a->row; 128 int mbs=a->mbs,*ai=a->i,*aj=a->j; 129 PetscErrorCode ierr; 130 int nz,*vj,k,*r,idx,k1; 131 int bs=a->bs,bs2 = a->bs2; 132 MatScalar *aa=a->a,*v,*diag; 133 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 134 135 PetscFunctionBegin; 136 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 137 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 138 t = a->solve_work; 139 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 140 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 141 142 /* solve U^T * D * y = b by forward substitution */ 143 xk = t; 144 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 145 idx = bs*r[k]; 146 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 147 } 148 for (k=0; k<mbs; k++){ 149 v = aa + bs2*ai[k]; 150 xk = t + k*bs; /* Dk*xk = k-th block of x */ 151 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 152 nz = ai[k+1] - ai[k]; 153 vj = aj + ai[k]; 154 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 155 while (nz--) { 156 /* x(:) += U(k,:)^T*(Dk*xk) */ 157 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 158 vj++; xj = t + (*vj)*bs; 159 v += bs2; 160 } 161 /* xk = inv(Dk)*(Dk*xk) */ 162 diag = aa+k*bs2; /* ptr to inv(Dk) */ 163 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 164 } 165 166 /* solve U*x = y by back substitution */ 167 for (k=mbs-1; k>=0; k--){ 168 v = aa + bs2*ai[k]; 169 xk = t + k*bs; /* xk */ 170 nz = ai[k+1] - ai[k]; 171 vj = aj + ai[k]; 172 xj = t + (*vj)*bs; 173 while (nz--) { 174 /* xk += U(k,:)*x(:) */ 175 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 176 vj++; 177 v += bs2; xj = t + (*vj)*bs; 178 } 179 idx = bs*r[k]; 180 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 181 } 182 183 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 184 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 185 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 186 PetscLogFlops(bs2*(2*a->nz + mbs)); 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 192 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 193 { 194 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 195 PetscErrorCode ierr; 196 int mbs=a->mbs,*ai=a->i,*aj=a->j; 197 int nz,*vj,k; 198 int bs=a->bs,bs2 = a->bs2; 199 MatScalar *aa=a->a,*v,*diag; 200 PetscScalar *x,*xk,*xj,*b,*xk_tmp; 201 202 PetscFunctionBegin; 203 204 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 205 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 206 207 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 208 209 /* solve U^T * D * y = b by forward substitution */ 210 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 211 for (k=0; k<mbs; k++){ 212 v = aa + bs2*ai[k]; 213 xk = x + k*bs; /* Dk*xk = k-th block of x */ 214 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 215 nz = ai[k+1] - ai[k]; 216 vj = aj + ai[k]; 217 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 218 while (nz--) { 219 /* x(:) += U(k,:)^T*(Dk*xk) */ 220 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 221 vj++; xj = x + (*vj)*bs; 222 v += bs2; 223 } 224 /* xk = inv(Dk)*(Dk*xk) */ 225 diag = aa+k*bs2; /* ptr to inv(Dk) */ 226 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 227 } 228 229 /* solve U*x = y by back substitution */ 230 for (k=mbs-1; k>=0; k--){ 231 v = aa + bs2*ai[k]; 232 xk = x + k*bs; /* xk */ 233 nz = ai[k+1] - ai[k]; 234 vj = aj + ai[k]; 235 xj = x + (*vj)*bs; 236 while (nz--) { 237 /* xk += U(k,:)*x(:) */ 238 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 239 vj++; 240 v += bs2; xj = x + (*vj)*bs; 241 } 242 } 243 244 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 245 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 246 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 247 PetscLogFlops(bs2*(2*a->nz + mbs)); 248 PetscFunctionReturn(0); 249 } 250 251 #undef __FUNCT__ 252 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 253 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 254 { 255 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 256 IS isrow=a->row; 257 int mbs=a->mbs,*ai=a->i,*aj=a->j; 258 PetscErrorCode ierr; 259 int nz,*vj,k,*r,idx; 260 MatScalar *aa=a->a,*v,*d; 261 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 262 263 PetscFunctionBegin; 264 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 265 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 266 t = a->solve_work; 267 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 268 269 /* solve U^T * D * y = b by forward substitution */ 270 tp = t; 271 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 272 idx = 7*r[k]; 273 tp[0] = b[idx]; 274 tp[1] = b[idx+1]; 275 tp[2] = b[idx+2]; 276 tp[3] = b[idx+3]; 277 tp[4] = b[idx+4]; 278 tp[5] = b[idx+5]; 279 tp[6] = b[idx+6]; 280 tp += 7; 281 } 282 283 for (k=0; k<mbs; k++){ 284 v = aa + 49*ai[k]; 285 vj = aj + ai[k]; 286 tp = t + k*7; 287 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 288 nz = ai[k+1] - ai[k]; 289 tp = t + (*vj)*7; 290 while (nz--) { 291 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 292 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 293 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 294 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 295 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 296 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 297 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 298 vj++; tp = t + (*vj)*7; 299 v += 49; 300 } 301 302 /* xk = inv(Dk)*(Dk*xk) */ 303 d = aa+k*49; /* ptr to inv(Dk) */ 304 tp = t + k*7; 305 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 306 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 307 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 308 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 309 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 310 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 311 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 312 } 313 314 /* solve U*x = y by back substitution */ 315 for (k=mbs-1; k>=0; k--){ 316 v = aa + 49*ai[k]; 317 vj = aj + ai[k]; 318 tp = t + k*7; 319 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 320 nz = ai[k+1] - ai[k]; 321 322 tp = t + (*vj)*7; 323 while (nz--) { 324 /* xk += U(k,:)*x(:) */ 325 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 326 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 327 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 328 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 329 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 330 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 331 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 332 vj++; tp = t + (*vj)*7; 333 v += 49; 334 } 335 tp = t + k*7; 336 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 337 idx = 7*r[k]; 338 x[idx] = x0; 339 x[idx+1] = x1; 340 x[idx+2] = x2; 341 x[idx+3] = x3; 342 x[idx+4] = x4; 343 x[idx+5] = x5; 344 x[idx+6] = x6; 345 } 346 347 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 348 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 349 PetscLogFlops(49*(2*a->nz + mbs)); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 355 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 356 { 357 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 358 PetscErrorCode ierr; 359 int mbs=a->mbs,*ai=a->i,*aj=a->j; 360 MatScalar *aa=a->a,*v,*d; 361 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6; 362 int nz,*vj,k; 363 364 PetscFunctionBegin; 365 366 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 367 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 368 369 /* solve U^T * D * y = b by forward substitution */ 370 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 371 for (k=0; k<mbs; k++){ 372 v = aa + 49*ai[k]; 373 xp = x + k*7; 374 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 375 nz = ai[k+1] - ai[k]; 376 vj = aj + ai[k]; 377 xp = x + (*vj)*7; 378 while (nz--) { 379 /* x(:) += U(k,:)^T*(Dk*xk) */ 380 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 381 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 382 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 383 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 384 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 385 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 386 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 387 vj++; xp = x + (*vj)*7; 388 v += 49; 389 } 390 /* xk = inv(Dk)*(Dk*xk) */ 391 d = aa+k*49; /* ptr to inv(Dk) */ 392 xp = x + k*7; 393 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 394 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 395 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 396 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 397 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 398 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 399 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 400 } 401 402 /* solve U*x = y by back substitution */ 403 for (k=mbs-1; k>=0; k--){ 404 v = aa + 49*ai[k]; 405 xp = x + k*7; 406 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 407 nz = ai[k+1] - ai[k]; 408 vj = aj + ai[k]; 409 xp = x + (*vj)*7; 410 while (nz--) { 411 /* xk += U(k,:)*x(:) */ 412 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 413 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 414 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 415 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 416 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 417 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 418 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 419 vj++; 420 v += 49; xp = x + (*vj)*7; 421 } 422 xp = x + k*7; 423 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 424 } 425 426 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 427 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 428 PetscLogFlops(49*(2*a->nz + mbs)); 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 434 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 435 { 436 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 437 IS isrow=a->row; 438 int mbs=a->mbs,*ai=a->i,*aj=a->j; 439 PetscErrorCode ierr; 440 int nz,*vj,k,*r,idx; 441 MatScalar *aa=a->a,*v,*d; 442 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 443 444 PetscFunctionBegin; 445 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 446 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 447 t = a->solve_work; 448 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 449 450 /* solve U^T * D * y = b by forward substitution */ 451 tp = t; 452 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 453 idx = 6*r[k]; 454 tp[0] = b[idx]; 455 tp[1] = b[idx+1]; 456 tp[2] = b[idx+2]; 457 tp[3] = b[idx+3]; 458 tp[4] = b[idx+4]; 459 tp[5] = b[idx+5]; 460 tp += 6; 461 } 462 463 for (k=0; k<mbs; k++){ 464 v = aa + 36*ai[k]; 465 vj = aj + ai[k]; 466 tp = t + k*6; 467 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 468 nz = ai[k+1] - ai[k]; 469 tp = t + (*vj)*6; 470 while (nz--) { 471 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 472 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 473 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 474 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 475 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 476 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 477 vj++; tp = t + (*vj)*6; 478 v += 36; 479 } 480 481 /* xk = inv(Dk)*(Dk*xk) */ 482 d = aa+k*36; /* ptr to inv(Dk) */ 483 tp = t + k*6; 484 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 485 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 486 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 487 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 488 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 489 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 490 } 491 492 /* solve U*x = y by back substitution */ 493 for (k=mbs-1; k>=0; k--){ 494 v = aa + 36*ai[k]; 495 vj = aj + ai[k]; 496 tp = t + k*6; 497 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 498 nz = ai[k+1] - ai[k]; 499 500 tp = t + (*vj)*6; 501 while (nz--) { 502 /* xk += U(k,:)*x(:) */ 503 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 504 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 505 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 506 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 507 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 508 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 509 vj++; tp = t + (*vj)*6; 510 v += 36; 511 } 512 tp = t + k*6; 513 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 514 idx = 6*r[k]; 515 x[idx] = x0; 516 x[idx+1] = x1; 517 x[idx+2] = x2; 518 x[idx+3] = x3; 519 x[idx+4] = x4; 520 x[idx+5] = x5; 521 } 522 523 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 524 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 525 PetscLogFlops(36*(2*a->nz + mbs)); 526 PetscFunctionReturn(0); 527 } 528 529 #undef __FUNCT__ 530 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 531 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 532 { 533 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 534 int mbs=a->mbs,*ai=a->i,*aj=a->j; 535 MatScalar *aa=a->a,*v,*d; 536 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5; 537 PetscErrorCode ierr; 538 int nz,*vj,k; 539 540 PetscFunctionBegin; 541 542 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 543 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 544 545 /* solve U^T * D * y = b by forward substitution */ 546 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 547 for (k=0; k<mbs; k++){ 548 v = aa + 36*ai[k]; 549 xp = x + k*6; 550 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 551 nz = ai[k+1] - ai[k]; 552 vj = aj + ai[k]; 553 xp = x + (*vj)*6; 554 while (nz--) { 555 /* x(:) += U(k,:)^T*(Dk*xk) */ 556 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 557 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 558 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 559 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 560 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 561 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 562 vj++; xp = x + (*vj)*6; 563 v += 36; 564 } 565 /* xk = inv(Dk)*(Dk*xk) */ 566 d = aa+k*36; /* ptr to inv(Dk) */ 567 xp = x + k*6; 568 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 569 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 570 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 571 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 572 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 573 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 574 } 575 576 /* solve U*x = y by back substitution */ 577 for (k=mbs-1; k>=0; k--){ 578 v = aa + 36*ai[k]; 579 xp = x + k*6; 580 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 581 nz = ai[k+1] - ai[k]; 582 vj = aj + ai[k]; 583 xp = x + (*vj)*6; 584 while (nz--) { 585 /* xk += U(k,:)*x(:) */ 586 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 587 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 588 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 589 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 590 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 591 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 592 vj++; 593 v += 36; xp = x + (*vj)*6; 594 } 595 xp = x + k*6; 596 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 597 } 598 599 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 600 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 601 PetscLogFlops(36*(2*a->nz + mbs)); 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 607 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 608 { 609 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 610 IS isrow=a->row; 611 int mbs=a->mbs,*ai=a->i,*aj=a->j; 612 PetscErrorCode ierr; 613 int nz,*vj,k,*r,idx; 614 MatScalar *aa=a->a,*v,*diag; 615 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 616 617 PetscFunctionBegin; 618 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 619 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 620 t = a->solve_work; 621 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 622 623 /* solve U^T * D * y = b by forward substitution */ 624 tp = t; 625 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 626 idx = 5*r[k]; 627 tp[0] = b[idx]; 628 tp[1] = b[idx+1]; 629 tp[2] = b[idx+2]; 630 tp[3] = b[idx+3]; 631 tp[4] = b[idx+4]; 632 tp += 5; 633 } 634 635 for (k=0; k<mbs; k++){ 636 v = aa + 25*ai[k]; 637 vj = aj + ai[k]; 638 tp = t + k*5; 639 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 640 nz = ai[k+1] - ai[k]; 641 642 tp = t + (*vj)*5; 643 while (nz--) { 644 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 645 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 646 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 647 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 648 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 649 vj++; tp = t + (*vj)*5; 650 v += 25; 651 } 652 653 /* xk = inv(Dk)*(Dk*xk) */ 654 diag = aa+k*25; /* ptr to inv(Dk) */ 655 tp = t + k*5; 656 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 657 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 658 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 659 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 660 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 661 } 662 663 /* solve U*x = y by back substitution */ 664 for (k=mbs-1; k>=0; k--){ 665 v = aa + 25*ai[k]; 666 vj = aj + ai[k]; 667 tp = t + k*5; 668 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 669 nz = ai[k+1] - ai[k]; 670 671 tp = t + (*vj)*5; 672 while (nz--) { 673 /* xk += U(k,:)*x(:) */ 674 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 675 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 676 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 677 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 678 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 679 vj++; tp = t + (*vj)*5; 680 v += 25; 681 } 682 tp = t + k*5; 683 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 684 idx = 5*r[k]; 685 x[idx] = x0; 686 x[idx+1] = x1; 687 x[idx+2] = x2; 688 x[idx+3] = x3; 689 x[idx+4] = x4; 690 } 691 692 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 693 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 694 PetscLogFlops(25*(2*a->nz + mbs)); 695 PetscFunctionReturn(0); 696 } 697 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 700 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 701 { 702 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 703 int mbs=a->mbs,*ai=a->i,*aj=a->j; 704 MatScalar *aa=a->a,*v,*diag; 705 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4; 706 PetscErrorCode ierr; 707 int nz,*vj,k; 708 709 PetscFunctionBegin; 710 711 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 712 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 713 714 /* solve U^T * D * y = b by forward substitution */ 715 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 716 for (k=0; k<mbs; k++){ 717 v = aa + 25*ai[k]; 718 xp = x + k*5; 719 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 720 nz = ai[k+1] - ai[k]; 721 vj = aj + ai[k]; 722 xp = x + (*vj)*5; 723 while (nz--) { 724 /* x(:) += U(k,:)^T*(Dk*xk) */ 725 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 726 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 727 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 728 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 729 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 730 vj++; xp = x + (*vj)*5; 731 v += 25; 732 } 733 /* xk = inv(Dk)*(Dk*xk) */ 734 diag = aa+k*25; /* ptr to inv(Dk) */ 735 xp = x + k*5; 736 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 737 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 738 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 739 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 740 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 741 } 742 743 /* solve U*x = y by back substitution */ 744 for (k=mbs-1; k>=0; k--){ 745 v = aa + 25*ai[k]; 746 xp = x + k*5; 747 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 748 nz = ai[k+1] - ai[k]; 749 vj = aj + ai[k]; 750 xp = x + (*vj)*5; 751 while (nz--) { 752 /* xk += U(k,:)*x(:) */ 753 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 754 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 755 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 756 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 757 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 758 vj++; 759 v += 25; xp = x + (*vj)*5; 760 } 761 xp = x + k*5; 762 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 763 } 764 765 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 766 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 767 PetscLogFlops(25*(2*a->nz + mbs)); 768 PetscFunctionReturn(0); 769 } 770 771 #undef __FUNCT__ 772 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 773 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 774 { 775 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 776 IS isrow=a->row; 777 int mbs=a->mbs,*ai=a->i,*aj=a->j; 778 PetscErrorCode ierr; 779 int nz,*vj,k,*r,idx; 780 MatScalar *aa=a->a,*v,*diag; 781 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 782 783 PetscFunctionBegin; 784 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 785 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 786 t = a->solve_work; 787 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 788 789 /* solve U^T * D * y = b by forward substitution */ 790 tp = t; 791 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 792 idx = 4*r[k]; 793 tp[0] = b[idx]; 794 tp[1] = b[idx+1]; 795 tp[2] = b[idx+2]; 796 tp[3] = b[idx+3]; 797 tp += 4; 798 } 799 800 for (k=0; k<mbs; k++){ 801 v = aa + 16*ai[k]; 802 vj = aj + ai[k]; 803 tp = t + k*4; 804 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 805 nz = ai[k+1] - ai[k]; 806 807 tp = t + (*vj)*4; 808 while (nz--) { 809 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 810 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 811 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 812 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 813 vj++; tp = t + (*vj)*4; 814 v += 16; 815 } 816 817 /* xk = inv(Dk)*(Dk*xk) */ 818 diag = aa+k*16; /* ptr to inv(Dk) */ 819 tp = t + k*4; 820 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 821 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 822 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 823 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 824 } 825 826 /* solve U*x = y by back substitution */ 827 for (k=mbs-1; k>=0; k--){ 828 v = aa + 16*ai[k]; 829 vj = aj + ai[k]; 830 tp = t + k*4; 831 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 832 nz = ai[k+1] - ai[k]; 833 834 tp = t + (*vj)*4; 835 while (nz--) { 836 /* xk += U(k,:)*x(:) */ 837 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 838 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 839 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 840 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 841 vj++; tp = t + (*vj)*4; 842 v += 16; 843 } 844 tp = t + k*4; 845 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 846 idx = 4*r[k]; 847 x[idx] = x0; 848 x[idx+1] = x1; 849 x[idx+2] = x2; 850 x[idx+3] = x3; 851 } 852 853 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 854 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 855 PetscLogFlops(16*(2*a->nz + mbs)); 856 PetscFunctionReturn(0); 857 } 858 859 /* 860 Special case where the matrix was factored in the natural ordering. 861 This eliminates the need for the column and row permutation. 862 */ 863 #undef __FUNCT__ 864 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 865 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 866 { 867 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 868 int mbs=a->mbs,*ai=a->i,*aj=a->j; 869 MatScalar *aa=a->a,*v,*diag; 870 PetscScalar *x,*xp,*b,x0,x1,x2,x3; 871 PetscErrorCode ierr; 872 int nz,*vj,k; 873 874 PetscFunctionBegin; 875 876 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 877 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 878 879 /* solve U^T * D * y = b by forward substitution */ 880 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 881 for (k=0; k<mbs; k++){ 882 v = aa + 16*ai[k]; 883 xp = x + k*4; 884 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 885 nz = ai[k+1] - ai[k]; 886 vj = aj + ai[k]; 887 xp = x + (*vj)*4; 888 while (nz--) { 889 /* x(:) += U(k,:)^T*(Dk*xk) */ 890 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 891 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 892 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 893 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 894 vj++; xp = x + (*vj)*4; 895 v += 16; 896 } 897 /* xk = inv(Dk)*(Dk*xk) */ 898 diag = aa+k*16; /* ptr to inv(Dk) */ 899 xp = x + k*4; 900 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 901 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 902 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 903 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 904 } 905 906 /* solve U*x = y by back substitution */ 907 for (k=mbs-1; k>=0; k--){ 908 v = aa + 16*ai[k]; 909 xp = x + k*4; 910 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 911 nz = ai[k+1] - ai[k]; 912 vj = aj + ai[k]; 913 xp = x + (*vj)*4; 914 while (nz--) { 915 /* xk += U(k,:)*x(:) */ 916 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 917 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 918 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 919 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 920 vj++; 921 v += 16; xp = x + (*vj)*4; 922 } 923 xp = x + k*4; 924 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 925 } 926 927 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 928 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 929 PetscLogFlops(16*(2*a->nz + mbs)); 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 935 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 936 { 937 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 938 IS isrow=a->row; 939 int mbs=a->mbs,*ai=a->i,*aj=a->j; 940 PetscErrorCode ierr; 941 int nz,*vj,k,*r,idx; 942 MatScalar *aa=a->a,*v,*diag; 943 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 944 945 PetscFunctionBegin; 946 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 947 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 948 t = a->solve_work; 949 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 950 951 /* solve U^T * D * y = b by forward substitution */ 952 tp = t; 953 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 954 idx = 3*r[k]; 955 tp[0] = b[idx]; 956 tp[1] = b[idx+1]; 957 tp[2] = b[idx+2]; 958 tp += 3; 959 } 960 961 for (k=0; k<mbs; k++){ 962 v = aa + 9*ai[k]; 963 vj = aj + ai[k]; 964 tp = t + k*3; 965 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 966 nz = ai[k+1] - ai[k]; 967 968 tp = t + (*vj)*3; 969 while (nz--) { 970 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 971 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 972 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 973 vj++; tp = t + (*vj)*3; 974 v += 9; 975 } 976 977 /* xk = inv(Dk)*(Dk*xk) */ 978 diag = aa+k*9; /* ptr to inv(Dk) */ 979 tp = t + k*3; 980 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 981 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 982 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 983 } 984 985 /* solve U*x = y by back substitution */ 986 for (k=mbs-1; k>=0; k--){ 987 v = aa + 9*ai[k]; 988 vj = aj + ai[k]; 989 tp = t + k*3; 990 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 991 nz = ai[k+1] - ai[k]; 992 993 tp = t + (*vj)*3; 994 while (nz--) { 995 /* xk += U(k,:)*x(:) */ 996 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 997 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 998 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 999 vj++; tp = t + (*vj)*3; 1000 v += 9; 1001 } 1002 tp = t + k*3; 1003 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1004 idx = 3*r[k]; 1005 x[idx] = x0; 1006 x[idx+1] = x1; 1007 x[idx+2] = x2; 1008 } 1009 1010 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1011 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1012 PetscLogFlops(9*(2*a->nz + mbs)); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 /* 1017 Special case where the matrix was factored in the natural ordering. 1018 This eliminates the need for the column and row permutation. 1019 */ 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1022 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1023 { 1024 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1025 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1026 MatScalar *aa=a->a,*v,*diag; 1027 PetscScalar *x,*xp,*b,x0,x1,x2; 1028 PetscErrorCode ierr; 1029 int nz,*vj,k; 1030 1031 PetscFunctionBegin; 1032 1033 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1034 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1035 1036 /* solve U^T * D * y = b by forward substitution */ 1037 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1038 for (k=0; k<mbs; k++){ 1039 v = aa + 9*ai[k]; 1040 xp = x + k*3; 1041 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1042 nz = ai[k+1] - ai[k]; 1043 vj = aj + ai[k]; 1044 xp = x + (*vj)*3; 1045 while (nz--) { 1046 /* x(:) += U(k,:)^T*(Dk*xk) */ 1047 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1048 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1049 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1050 vj++; xp = x + (*vj)*3; 1051 v += 9; 1052 } 1053 /* xk = inv(Dk)*(Dk*xk) */ 1054 diag = aa+k*9; /* ptr to inv(Dk) */ 1055 xp = x + k*3; 1056 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1057 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1058 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1059 } 1060 1061 /* solve U*x = y by back substitution */ 1062 for (k=mbs-1; k>=0; k--){ 1063 v = aa + 9*ai[k]; 1064 xp = x + k*3; 1065 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1066 nz = ai[k+1] - ai[k]; 1067 vj = aj + ai[k]; 1068 xp = x + (*vj)*3; 1069 while (nz--) { 1070 /* xk += U(k,:)*x(:) */ 1071 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1072 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1073 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1074 vj++; 1075 v += 9; xp = x + (*vj)*3; 1076 } 1077 xp = x + k*3; 1078 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1079 } 1080 1081 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1082 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1083 PetscLogFlops(9*(2*a->nz + mbs)); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1089 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1090 { 1091 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1092 IS isrow=a->row; 1093 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1094 PetscErrorCode ierr; 1095 int nz,*vj,k,k2,*r,idx; 1096 MatScalar *aa=a->a,*v,*diag; 1097 PetscScalar *x,*b,x0,x1,*t; 1098 1099 PetscFunctionBegin; 1100 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1101 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1102 t = a->solve_work; 1103 /* printf("called MatSolve_SeqSBAIJ_2\n"); */ 1104 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1105 1106 /* solve U^T * D * y = perm(b) by forward substitution */ 1107 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1108 idx = 2*r[k]; 1109 t[k*2] = b[idx]; 1110 t[k*2+1] = b[idx+1]; 1111 } 1112 for (k=0; k<mbs; k++){ 1113 v = aa + 4*ai[k]; 1114 vj = aj + ai[k]; 1115 k2 = k*2; 1116 x0 = t[k2]; x1 = t[k2+1]; 1117 nz = ai[k+1] - ai[k]; 1118 while (nz--) { 1119 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1120 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1121 vj++; v += 4; 1122 } 1123 diag = aa+k*4; /* ptr to inv(Dk) */ 1124 t[k2] = diag[0]*x0 + diag[2]*x1; 1125 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1126 } 1127 1128 /* solve U*x = y by back substitution */ 1129 for (k=mbs-1; k>=0; k--){ 1130 v = aa + 4*ai[k]; 1131 vj = aj + ai[k]; 1132 k2 = k*2; 1133 x0 = t[k2]; x1 = t[k2+1]; 1134 nz = ai[k+1] - ai[k]; 1135 while (nz--) { 1136 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1137 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1138 vj++; v += 4; 1139 } 1140 t[k2] = x0; 1141 t[k2+1] = x1; 1142 idx = 2*r[k]; 1143 x[idx] = x0; 1144 x[idx+1] = x1; 1145 } 1146 1147 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1148 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1149 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1150 PetscLogFlops(4*(2*a->nz + mbs)); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 /* 1155 Special case where the matrix was factored in the natural ordering. 1156 This eliminates the need for the column and row permutation. 1157 */ 1158 #undef __FUNCT__ 1159 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1160 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1161 { 1162 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1163 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1164 MatScalar *aa=a->a,*v,*diag; 1165 PetscScalar *x,*b,x0,x1; 1166 PetscErrorCode ierr; 1167 int nz,*vj,k,k2; 1168 1169 PetscFunctionBegin; 1170 1171 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1172 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1173 1174 /* solve U^T * D * y = b by forward substitution */ 1175 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1176 for (k=0; k<mbs; k++){ 1177 v = aa + 4*ai[k]; 1178 vj = aj + ai[k]; 1179 k2 = k*2; 1180 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1181 nz = ai[k+1] - ai[k]; 1182 1183 while (nz--) { 1184 /* x(:) += U(k,:)^T*(Dk*xk) */ 1185 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1186 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1187 vj++; v += 4; 1188 } 1189 /* xk = inv(Dk)*(Dk*xk) */ 1190 diag = aa+k*4; /* ptr to inv(Dk) */ 1191 x[k2] = diag[0]*x0 + diag[2]*x1; 1192 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1193 } 1194 1195 /* solve U*x = y by back substitution */ 1196 for (k=mbs-1; k>=0; k--){ 1197 v = aa + 4*ai[k]; 1198 vj = aj + ai[k]; 1199 k2 = k*2; 1200 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1201 nz = ai[k+1] - ai[k]; 1202 while (nz--) { 1203 /* xk += U(k,:)*x(:) */ 1204 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1205 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1206 vj++; v += 4; 1207 } 1208 x[k2] = x0; 1209 x[k2+1] = x1; 1210 } 1211 1212 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1214 PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */ 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1220 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1221 { 1222 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1223 IS isrow=a->row; 1224 PetscErrorCode ierr; 1225 int mbs=a->mbs,*ai=a->i,*aj=a->j,*rip; 1226 MatScalar *aa=a->a,*v; 1227 PetscScalar *x,*b,xk,*t; 1228 int nz,*vj,k; 1229 1230 PetscFunctionBegin; 1231 if (!mbs) PetscFunctionReturn(0); 1232 1233 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1234 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1235 t = a->solve_work; 1236 1237 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1238 1239 /* solve U^T*D*y = perm(b) by forward substitution */ 1240 for (k=0; k<mbs; k++) t[k] = b[rip[k]]; 1241 for (k=0; k<mbs; k++){ 1242 v = aa + ai[k]; 1243 vj = aj + ai[k]; 1244 xk = t[k]; 1245 nz = ai[k+1] - ai[k]; 1246 while (nz--) t[*vj++] += (*v++) * xk; 1247 t[k] = xk*aa[k]; /* note: aa[k] = 1/D(k) */ 1248 } 1249 1250 /* solve U*x = y by back substitution */ 1251 for (k=mbs-1; k>=0; k--){ 1252 v = aa + ai[k]; 1253 vj = aj + ai[k]; 1254 xk = t[k]; 1255 nz = ai[k+1] - ai[k]; 1256 while (nz--) xk += (*v++) * t[*vj++]; 1257 t[k] = xk; 1258 x[rip[k]] = xk; 1259 } 1260 1261 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1262 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1263 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1264 PetscLogFlops(4*a->nz + A->m); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1270 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1271 { 1272 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 if (a->bs == 1) { 1277 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1278 } else { 1279 IS isrow=a->row; 1280 int mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i; 1281 MatScalar *aa=a->a,*v; 1282 PetscScalar *x,*b,*t; 1283 int nz,*vj,k,n; 1284 if (bb->n > a->solves_work_n) { 1285 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 1286 ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1287 a->solves_work_n = bb->n; 1288 } 1289 n = bb->n; 1290 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1291 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1292 t = a->solves_work; 1293 1294 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1295 1296 /* solve U^T*D*y = perm(b) by forward substitution */ 1297 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */ 1298 for (k=0; k<mbs; k++){ 1299 v = aa + ai[k]; 1300 vj = aj + ai[k]; 1301 nz = ai[k+1] - ai[k]; 1302 while (nz--) { 1303 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1304 v++;vj++; 1305 } 1306 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1307 } 1308 1309 /* solve U*x = y by back substitution */ 1310 for (k=mbs-1; k>=0; k--){ 1311 v = aa + ai[k]; 1312 vj = aj + ai[k]; 1313 nz = ai[k+1] - ai[k]; 1314 while (nz--) { 1315 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1316 v++;vj++; 1317 } 1318 for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i]; 1319 } 1320 1321 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1322 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1323 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1324 PetscLogFlops(bb->n*(4*a->nz + A->m)); 1325 } 1326 PetscFunctionReturn(0); 1327 } 1328 1329 /* 1330 Special case where the matrix was ILU(0) factored in the natural 1331 ordering. This eliminates the need for the column and row permutation. 1332 */ 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1335 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1336 { 1337 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1338 PetscErrorCode ierr; 1339 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1340 MatScalar *aa=a->a,*v; 1341 PetscScalar *x,*b,xk; 1342 int nz,*vj,k; 1343 1344 PetscFunctionBegin; 1345 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1346 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1347 1348 /* solve U^T*D*y = b by forward substitution */ 1349 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1350 for (k=0; k<mbs; k++){ 1351 v = aa + ai[k] + 1; 1352 vj = aj + ai[k] + 1; 1353 xk = x[k]; 1354 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1355 while (nz--) x[*vj++] += (*v++) * xk; 1356 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1357 } 1358 1359 /* solve U*x = y by back substitution */ 1360 for (k=mbs-2; k>=0; k--){ 1361 v = aa + ai[k] + 1; 1362 vj = aj + ai[k] + 1; 1363 xk = x[k]; 1364 nz = ai[k+1] - ai[k] - 1; 1365 while (nz--) xk += (*v++) * x[*vj++]; 1366 x[k] = xk; 1367 } 1368 1369 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1370 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1371 PetscLogFlops(4*a->nz + A->m); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 1378 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1379 { 1380 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1381 PetscErrorCode ierr; 1382 int *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1383 int *jutmp,bs = a->bs,bs2=a->bs2; 1384 int m,realloc = 0,*levtmp; 1385 int *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd,*jl; 1386 int incrlev,*lev,shift,prow,nz; 1387 int *il,ili,nextprow; 1388 PetscReal f = info->fill,levels = info->levels; 1389 PetscTruth perm_identity; 1390 1391 PetscFunctionBegin; 1392 /* check whether perm is the identity mapping */ 1393 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1394 1395 /* special case that simply copies fill pattern */ 1396 if (!levels && perm_identity && bs==1) { 1397 ierr = MatDuplicate_SeqSBAIJ(A,MAT_DO_NOT_COPY_VALUES,B);CHKERRQ(ierr); 1398 (*B)->factor = FACTOR_CHOLESKY; 1399 b = (Mat_SeqSBAIJ*)(*B)->data; 1400 b->row = perm; 1401 b->icol = perm; 1402 b->factor_damping = info->damping; 1403 b->factor_shift = info->shift; 1404 b->factor_zeropivot = info->zeropivot; 1405 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1406 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1407 ierr = PetscMalloc(((*B)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1408 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1409 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1410 PetscFunctionReturn(0); 1411 } 1412 1413 /* --- inplace icc(levels), levels>0, ie, *B has same data structure as A --- */ 1414 if (levels > 0 && perm_identity && bs==1 ){ 1415 if (!perm_identity) a->permute = PETSC_TRUE; 1416 1417 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1418 1419 if (perm_identity){ /* without permutation */ 1420 ai = a->i; aj = a->j; 1421 } else { /* non-trivial permutation */ 1422 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 1423 ai = a->inew; aj = a->jnew; 1424 } 1425 1426 /* initialization */ 1427 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 1428 umax = (int)(f*ai[mbs] + 1); 1429 ierr = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr); 1430 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 1431 iu[0] = 0; 1432 juidx = 0; /* index for ju */ 1433 ierr = PetscMalloc((4*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */ 1434 q = jl + mbs; /* linked list for col index of active row */ 1435 levtmp = q + mbs; 1436 il = levtmp + mbs; 1437 for (i=0; i<mbs; i++){ 1438 jl[i] = mbs; 1439 q[i] = 0; 1440 il[i] = 0; 1441 } 1442 1443 /* for each row k */ 1444 for (k=0; k<mbs; k++){ 1445 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 1446 q[k] = mbs; 1447 /* initialize nonzero structure of k-th row to row rip[k] of A */ 1448 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 1449 jmax = ai[rip[k]+1]; 1450 for (j=jmin; j<jmax; j++){ 1451 vj = rip[aj[j]]; /* col. value */ 1452 if(vj > k){ 1453 qm = k; 1454 do { 1455 m = qm; qm = q[m]; 1456 } while(qm < vj); 1457 if (qm == vj) { 1458 SETERRQ(1," error: duplicate entry in A\n"); 1459 } 1460 nzk++; 1461 q[m] = vj; 1462 q[vj] = qm; 1463 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1464 } /* if(vj > k) */ 1465 } /* for (j=jmin; j<jmax; j++) */ 1466 1467 /* modify nonzero structure of k-th row by computing fill-in 1468 for each row i to be merged in */ 1469 prow = k; 1470 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 1471 1472 while (prow < k){ 1473 nextprow = jl[prow]; 1474 /* merge row prow into k-th row */ 1475 ili = il[prow]; 1476 jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */ 1477 jmax = iu[prow+1]; 1478 qm = k; 1479 for (j=jmin; j<jmax; j++){ 1480 vj = ju[j]; 1481 incrlev = lev[j] + 1; 1482 if (incrlev > levels) continue; 1483 do { 1484 m = qm; qm = q[m]; 1485 } while (qm < vj); 1486 if (qm != vj){ /* a fill */ 1487 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1488 levtmp[vj] = incrlev; 1489 } else { 1490 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1491 } 1492 } 1493 if (jmin < jmax){ /* update il and jl */ 1494 il[prow] = jmin; 1495 j = ju[jmin]; 1496 jl[prow] = jl[j]; jl[j] = prow; 1497 } 1498 prow = nextprow; 1499 } 1500 1501 /* add the first nonzero element in U(k, k+1:mbs-1) to jl */ 1502 if (nzk > 0){ 1503 i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1504 jl[k] = jl[i]; jl[i] = k; 1505 il[k] = iu[k] + 1; 1506 } 1507 iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */ 1508 1509 /* allocate more space to ju if needed */ 1510 if (iu[k+1] > umax) { 1511 /* estimate how much additional space we will need */ 1512 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1513 /* just double the memory each time */ 1514 maxadd = umax; 1515 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1516 umax += maxadd; 1517 1518 /* allocate a longer ju */ 1519 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1520 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 1521 ierr = PetscFree(ju);CHKERRQ(ierr); 1522 ju = jutmp; 1523 1524 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1525 ierr = PetscMemcpy(jutmp,lev,(iu[k])*sizeof(int));CHKERRQ(ierr); 1526 ierr = PetscFree(lev);CHKERRQ(ierr); 1527 lev = jutmp; 1528 realloc += 2; /* count how many times we realloc */ 1529 } 1530 1531 /* save nonzero structure of k-th row in ju */ 1532 ju[juidx] = k; /* diag[k] */ 1533 lev[juidx] = 0; 1534 juidx++; 1535 i = k; 1536 while (nzk --) { 1537 i = q[i]; 1538 ju[juidx] = i; 1539 lev[juidx] = levtmp[i]; 1540 juidx++; 1541 } 1542 } /* end of for (k=0; k<mbs; k++) */ 1543 1544 if (ai[mbs] != 0) { 1545 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1546 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1547 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1548 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 1549 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1550 } else { 1551 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1552 } 1553 1554 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1555 ierr = PetscFree(jl);CHKERRQ(ierr); 1556 ierr = PetscFree(lev);CHKERRQ(ierr); 1557 1558 /* put together the new matrix */ 1559 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 1560 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 1561 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1562 1563 /* PetscLogObjectParent(*B,iperm); */ 1564 b = (Mat_SeqSBAIJ*)(*B)->data; 1565 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1566 b->singlemalloc = PETSC_FALSE; 1567 /* the next line frees the default space generated by the Create() */ 1568 ierr = PetscFree(b->a);CHKERRQ(ierr); 1569 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1570 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1571 b->j = ju; 1572 b->i = iu; 1573 b->diag = 0; 1574 b->ilen = 0; 1575 b->imax = 0; 1576 b->row = perm; 1577 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1578 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1579 b->icol = perm; 1580 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1581 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1582 /* In b structure: Free imax, ilen, old a, old j. 1583 Allocate idnew, solve_work, new a, new j */ 1584 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 1585 b->maxnz = b->nz = iu[mbs]; 1586 b->factor_damping = info->damping; 1587 b->factor_shift = info->shift; 1588 b->factor_zeropivot = info->zeropivot; 1589 1590 (*B)->factor = FACTOR_CHOLESKY; 1591 (*B)->info.factor_mallocs = realloc; 1592 (*B)->info.fill_ratio_given = f; 1593 if (ai[mbs] != 0) { 1594 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1595 } else { 1596 (*B)->info.fill_ratio_needed = 0.0; 1597 } 1598 1599 1600 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1601 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1602 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 1603 1604 PetscFunctionReturn(0); 1605 } /* end of if (levels > 0 && perm_identity && bs==1 ) */ 1606 1607 if (!perm_identity) a->permute = PETSC_TRUE; 1608 if (perm_identity){ 1609 ai = a->i; aj = a->j; 1610 } else { /* non-trivial permutation */ 1611 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1612 ai = a->inew; aj = a->jnew; 1613 } 1614 1615 /* initialization */ 1616 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1617 umax = (int)(f*ai[mbs] + 1); 1618 ierr = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr); 1619 umax += mbs + 1; 1620 shift = mbs + 1; 1621 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 1622 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 1623 iu[0] = mbs + 1; 1624 juidx = mbs + 1; 1625 /* prowl: linked list for pivot row */ 1626 ierr = PetscMalloc((3*mbs+1)*sizeof(int),&prowl);CHKERRQ(ierr); 1627 /* q: linked list for col index */ 1628 q = prowl + mbs; 1629 levtmp = q + mbs; 1630 1631 for (i=0; i<mbs; i++){ 1632 prowl[i] = mbs; 1633 q[i] = 0; 1634 } 1635 1636 /* for each row k */ 1637 for (k=0; k<mbs; k++){ 1638 nzk = 0; 1639 q[k] = mbs; 1640 /* copy current row into linked list */ 1641 nz = ai[rip[k]+1] - ai[rip[k]]; 1642 j = ai[rip[k]]; 1643 while (nz--){ 1644 vj = rip[aj[j++]]; 1645 if (vj > k){ 1646 qm = k; 1647 do { 1648 m = qm; qm = q[m]; 1649 } while(qm < vj); 1650 if (qm == vj) { 1651 SETERRQ(1," error: duplicate entry in A\n"); 1652 } 1653 nzk++; 1654 q[m] = vj; 1655 q[vj] = qm; 1656 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1657 } 1658 } 1659 1660 /* modify nonzero structure of k-th row by computing fill-in 1661 for each row prow to be merged in */ 1662 prow = k; 1663 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1664 1665 while (prow < k){ 1666 /* merge row prow into k-th row */ 1667 jmin = iu[prow] + 1; 1668 jmax = iu[prow+1]; 1669 qm = k; 1670 for (j=jmin; j<jmax; j++){ 1671 incrlev = lev[j-shift] + 1; 1672 if (incrlev > levels) continue; 1673 1674 vj = ju[j]; 1675 do { 1676 m = qm; qm = q[m]; 1677 } while (qm < vj); 1678 if (qm != vj){ /* a fill */ 1679 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1680 levtmp[vj] = incrlev; 1681 } else { 1682 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1683 } 1684 } 1685 prow = prowl[prow]; /* next pivot row */ 1686 } 1687 1688 /* add k to row list for first nonzero element in k-th row */ 1689 if (nzk > 1){ 1690 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1691 prowl[k] = prowl[i]; prowl[i] = k; 1692 } 1693 iu[k+1] = iu[k] + nzk; 1694 1695 /* allocate more space to ju and lev if needed */ 1696 if (iu[k+1] > umax) { 1697 /* estimate how much additional space we will need */ 1698 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1699 /* just double the memory each time */ 1700 maxadd = umax; 1701 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1702 umax += maxadd; 1703 1704 /* allocate a longer ju */ 1705 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1706 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 1707 ierr = PetscFree(ju);CHKERRQ(ierr); 1708 ju = jutmp; 1709 1710 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1711 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(int));CHKERRQ(ierr); 1712 ierr = PetscFree(lev);CHKERRQ(ierr); 1713 lev = jutmp; 1714 realloc += 2; /* count how many times we realloc */ 1715 } 1716 1717 /* save nonzero structure of k-th row in ju */ 1718 i=k; 1719 while (nzk --) { 1720 i = q[i]; 1721 ju[juidx] = i; 1722 lev[juidx-shift] = levtmp[i]; 1723 juidx++; 1724 } 1725 } 1726 1727 if (ai[mbs] != 0) { 1728 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1729 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1730 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af); 1731 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af); 1732 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1733 } else { 1734 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1735 } 1736 1737 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1738 ierr = PetscFree(prowl);CHKERRQ(ierr); 1739 ierr = PetscFree(lev);CHKERRQ(ierr); 1740 1741 /* put together the new matrix */ 1742 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 1743 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 1744 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1745 1746 /* PetscLogObjectParent(*B,iperm); */ 1747 b = (Mat_SeqSBAIJ*)(*B)->data; 1748 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1749 b->singlemalloc = PETSC_FALSE; 1750 /* the next line frees the default space generated by the Create() */ 1751 ierr = PetscFree(b->a);CHKERRQ(ierr); 1752 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1753 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1754 b->j = ju; 1755 b->i = iu; 1756 b->diag = 0; 1757 b->ilen = 0; 1758 b->imax = 0; 1759 1760 if (b->row) { 1761 ierr = ISDestroy(b->row);CHKERRQ(ierr); 1762 } 1763 if (b->icol) { 1764 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 1765 } 1766 b->row = perm; 1767 b->icol = perm; 1768 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1769 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1770 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1771 /* In b structure: Free imax, ilen, old a, old j. 1772 Allocate idnew, solve_work, new a, new j */ 1773 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 1774 b->maxnz = b->nz = iu[mbs]; 1775 1776 (*B)->factor = FACTOR_CHOLESKY; 1777 (*B)->info.factor_mallocs = realloc; 1778 (*B)->info.fill_ratio_given = f; 1779 if (ai[mbs] != 0) { 1780 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1781 } else { 1782 (*B)->info.fill_ratio_needed = 0.0; 1783 } 1784 1785 if (perm_identity){ 1786 switch (bs) { 1787 case 1: 1788 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1789 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1790 (*B)->ops->solves = MatSolves_SeqSBAIJ_1; 1791 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n"); 1792 break; 1793 case 2: 1794 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1795 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1796 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1797 break; 1798 case 3: 1799 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1800 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1801 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 1802 break; 1803 case 4: 1804 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1805 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1806 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1807 break; 1808 case 5: 1809 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1810 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1811 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1812 break; 1813 case 6: 1814 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1815 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1816 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1817 break; 1818 case 7: 1819 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1820 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1821 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1822 break; 1823 default: 1824 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 1825 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1826 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 1827 break; 1828 } 1829 } 1830 1831 PetscFunctionReturn(0); 1832 } 1833 1834 1835 1836