1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for SBAIJ format. 5 */ 6 7 #include "src/mat/impls/sbaij/seq/sbaij.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/inline/ilu.h" 10 #include "src/inline/dot.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 15 { 16 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 17 IS isrow=a->row; 18 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 19 PetscErrorCode ierr; 20 PetscInt nz,*vj,k,*r,idx,k1; 21 PetscInt bs=A->bs,bs2 = a->bs2; 22 MatScalar *aa=a->a,*v,*diag; 23 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 24 25 PetscFunctionBegin; 26 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 t = a->solve_work; 29 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 30 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 31 32 /* solve U^T * D * y = b by forward substitution */ 33 xk = t; 34 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 35 idx = bs*r[k]; 36 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 37 } 38 for (k=0; k<mbs; k++){ 39 v = aa + bs2*ai[k]; 40 xk = t + k*bs; /* Dk*xk = k-th block of x */ 41 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 42 nz = ai[k+1] - ai[k]; 43 vj = aj + ai[k]; 44 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 45 while (nz--) { 46 /* x(:) += U(k,:)^T*(Dk*xk) */ 47 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 48 vj++; xj = t + (*vj)*bs; 49 v += bs2; 50 } 51 /* xk = inv(Dk)*(Dk*xk) */ 52 diag = aa+k*bs2; /* ptr to inv(Dk) */ 53 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 54 } 55 56 /* solve U*x = y by back substitution */ 57 for (k=mbs-1; k>=0; k--){ 58 v = aa + bs2*ai[k]; 59 xk = t + k*bs; /* xk */ 60 nz = ai[k+1] - ai[k]; 61 vj = aj + ai[k]; 62 xj = t + (*vj)*bs; 63 while (nz--) { 64 /* xk += U(k,:)*x(:) */ 65 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 66 vj++; 67 v += bs2; xj = t + (*vj)*bs; 68 } 69 idx = bs*r[k]; 70 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 71 } 72 73 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 74 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 75 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 76 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 77 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 83 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 84 { 85 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 86 PetscErrorCode ierr; 87 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 88 PetscInt nz,*vj,k; 89 PetscInt bs=A->bs,bs2 = a->bs2; 90 MatScalar *aa=a->a,*v,*diag; 91 PetscScalar *x,*xk,*xj,*b,*xk_tmp; 92 93 PetscFunctionBegin; 94 95 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 96 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 97 98 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 99 100 /* solve U^T * D * y = b by forward substitution */ 101 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 102 for (k=0; k<mbs; k++){ 103 v = aa + bs2*ai[k]; 104 xk = x + k*bs; /* Dk*xk = k-th block of x */ 105 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 106 nz = ai[k+1] - ai[k]; 107 vj = aj + ai[k]; 108 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 109 while (nz--) { 110 /* x(:) += U(k,:)^T*(Dk*xk) */ 111 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 112 vj++; xj = x + (*vj)*bs; 113 v += bs2; 114 } 115 /* xk = inv(Dk)*(Dk*xk) */ 116 diag = aa+k*bs2; /* ptr to inv(Dk) */ 117 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 118 } 119 120 /* solve U*x = y by back substitution */ 121 for (k=mbs-1; k>=0; k--){ 122 v = aa + bs2*ai[k]; 123 xk = x + k*bs; /* xk */ 124 nz = ai[k+1] - ai[k]; 125 vj = aj + ai[k]; 126 xj = x + (*vj)*bs; 127 while (nz--) { 128 /* xk += U(k,:)*x(:) */ 129 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 130 vj++; 131 v += bs2; xj = x + (*vj)*bs; 132 } 133 } 134 135 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 136 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 137 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 138 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 144 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 145 { 146 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 147 IS isrow=a->row; 148 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 149 PetscErrorCode ierr; 150 PetscInt nz,*vj,k,*r,idx; 151 MatScalar *aa=a->a,*v,*d; 152 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 153 154 PetscFunctionBegin; 155 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 156 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 157 t = a->solve_work; 158 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 159 160 /* solve U^T * D * y = b by forward substitution */ 161 tp = t; 162 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 163 idx = 7*r[k]; 164 tp[0] = b[idx]; 165 tp[1] = b[idx+1]; 166 tp[2] = b[idx+2]; 167 tp[3] = b[idx+3]; 168 tp[4] = b[idx+4]; 169 tp[5] = b[idx+5]; 170 tp[6] = b[idx+6]; 171 tp += 7; 172 } 173 174 for (k=0; k<mbs; k++){ 175 v = aa + 49*ai[k]; 176 vj = aj + ai[k]; 177 tp = t + k*7; 178 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 179 nz = ai[k+1] - ai[k]; 180 tp = t + (*vj)*7; 181 while (nz--) { 182 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 183 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 184 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 185 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 186 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 187 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 188 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 189 vj++; tp = t + (*vj)*7; 190 v += 49; 191 } 192 193 /* xk = inv(Dk)*(Dk*xk) */ 194 d = aa+k*49; /* ptr to inv(Dk) */ 195 tp = t + k*7; 196 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 197 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 198 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 199 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 200 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 201 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 202 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 203 } 204 205 /* solve U*x = y by back substitution */ 206 for (k=mbs-1; k>=0; k--){ 207 v = aa + 49*ai[k]; 208 vj = aj + ai[k]; 209 tp = t + k*7; 210 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 211 nz = ai[k+1] - ai[k]; 212 213 tp = t + (*vj)*7; 214 while (nz--) { 215 /* xk += U(k,:)*x(:) */ 216 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 217 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 218 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 219 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 220 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 221 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 222 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 223 vj++; tp = t + (*vj)*7; 224 v += 49; 225 } 226 tp = t + k*7; 227 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 228 idx = 7*r[k]; 229 x[idx] = x0; 230 x[idx+1] = x1; 231 x[idx+2] = x2; 232 x[idx+3] = x3; 233 x[idx+4] = x4; 234 x[idx+5] = x5; 235 x[idx+6] = x6; 236 } 237 238 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 239 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 240 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 241 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 242 PetscFunctionReturn(0); 243 } 244 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 247 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 248 { 249 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 250 PetscErrorCode ierr; 251 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 252 MatScalar *aa=a->a,*v,*d; 253 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6; 254 PetscInt nz,*vj,k; 255 256 PetscFunctionBegin; 257 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 258 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 259 260 /* solve U^T * D * y = b by forward substitution */ 261 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 262 for (k=0; k<mbs; k++){ 263 v = aa + 49*ai[k]; 264 xp = x + k*7; 265 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 266 nz = ai[k+1] - ai[k]; 267 vj = aj + ai[k]; 268 xp = x + (*vj)*7; 269 while (nz--) { 270 /* x(:) += U(k,:)^T*(Dk*xk) */ 271 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 272 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 273 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 274 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 275 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 276 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 277 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 278 vj++; xp = x + (*vj)*7; 279 v += 49; 280 } 281 /* xk = inv(Dk)*(Dk*xk) */ 282 d = aa+k*49; /* ptr to inv(Dk) */ 283 xp = x + k*7; 284 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 285 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 286 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 287 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 288 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 289 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 290 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 291 } 292 293 /* solve U*x = y by back substitution */ 294 for (k=mbs-1; k>=0; k--){ 295 v = aa + 49*ai[k]; 296 xp = x + k*7; 297 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 298 nz = ai[k+1] - ai[k]; 299 vj = aj + ai[k]; 300 xp = x + (*vj)*7; 301 while (nz--) { 302 /* xk += U(k,:)*x(:) */ 303 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 304 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 305 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 306 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 307 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 308 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 309 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 310 vj++; 311 v += 49; xp = x + (*vj)*7; 312 } 313 xp = x + k*7; 314 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 315 } 316 317 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 318 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 319 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 325 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 326 { 327 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 328 IS isrow=a->row; 329 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 330 PetscErrorCode ierr; 331 PetscInt nz,*vj,k,*r,idx; 332 MatScalar *aa=a->a,*v,*d; 333 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 334 335 PetscFunctionBegin; 336 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 337 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 338 t = a->solve_work; 339 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 340 341 /* solve U^T * D * y = b by forward substitution */ 342 tp = t; 343 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 344 idx = 6*r[k]; 345 tp[0] = b[idx]; 346 tp[1] = b[idx+1]; 347 tp[2] = b[idx+2]; 348 tp[3] = b[idx+3]; 349 tp[4] = b[idx+4]; 350 tp[5] = b[idx+5]; 351 tp += 6; 352 } 353 354 for (k=0; k<mbs; k++){ 355 v = aa + 36*ai[k]; 356 vj = aj + ai[k]; 357 tp = t + k*6; 358 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 359 nz = ai[k+1] - ai[k]; 360 tp = t + (*vj)*6; 361 while (nz--) { 362 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 363 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 364 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 365 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 366 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 367 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 368 vj++; tp = t + (*vj)*6; 369 v += 36; 370 } 371 372 /* xk = inv(Dk)*(Dk*xk) */ 373 d = aa+k*36; /* ptr to inv(Dk) */ 374 tp = t + k*6; 375 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 376 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 377 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 378 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 379 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 380 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 381 } 382 383 /* solve U*x = y by back substitution */ 384 for (k=mbs-1; k>=0; k--){ 385 v = aa + 36*ai[k]; 386 vj = aj + ai[k]; 387 tp = t + k*6; 388 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 389 nz = ai[k+1] - ai[k]; 390 391 tp = t + (*vj)*6; 392 while (nz--) { 393 /* xk += U(k,:)*x(:) */ 394 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 395 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 396 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 397 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 398 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 399 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 400 vj++; tp = t + (*vj)*6; 401 v += 36; 402 } 403 tp = t + k*6; 404 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 405 idx = 6*r[k]; 406 x[idx] = x0; 407 x[idx+1] = x1; 408 x[idx+2] = x2; 409 x[idx+3] = x3; 410 x[idx+4] = x4; 411 x[idx+5] = x5; 412 } 413 414 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 415 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 416 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 417 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 423 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 424 { 425 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 426 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 427 MatScalar *aa=a->a,*v,*d; 428 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5; 429 PetscErrorCode ierr; 430 PetscInt nz,*vj,k; 431 432 PetscFunctionBegin; 433 434 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 435 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 436 437 /* solve U^T * D * y = b by forward substitution */ 438 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 439 for (k=0; k<mbs; k++){ 440 v = aa + 36*ai[k]; 441 xp = x + k*6; 442 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 443 nz = ai[k+1] - ai[k]; 444 vj = aj + ai[k]; 445 xp = x + (*vj)*6; 446 while (nz--) { 447 /* x(:) += U(k,:)^T*(Dk*xk) */ 448 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 449 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 450 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 451 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 452 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 453 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 454 vj++; xp = x + (*vj)*6; 455 v += 36; 456 } 457 /* xk = inv(Dk)*(Dk*xk) */ 458 d = aa+k*36; /* ptr to inv(Dk) */ 459 xp = x + k*6; 460 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 461 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 462 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 463 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 464 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 465 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 466 } 467 468 /* solve U*x = y by back substitution */ 469 for (k=mbs-1; k>=0; k--){ 470 v = aa + 36*ai[k]; 471 xp = x + k*6; 472 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 473 nz = ai[k+1] - ai[k]; 474 vj = aj + ai[k]; 475 xp = x + (*vj)*6; 476 while (nz--) { 477 /* xk += U(k,:)*x(:) */ 478 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 479 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 480 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 481 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 482 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 483 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 484 vj++; 485 v += 36; xp = x + (*vj)*6; 486 } 487 xp = x + k*6; 488 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 489 } 490 491 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 492 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 493 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 499 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 500 { 501 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 502 IS isrow=a->row; 503 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 504 PetscErrorCode ierr; 505 PetscInt nz,*vj,k,*r,idx; 506 MatScalar *aa=a->a,*v,*diag; 507 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 508 509 PetscFunctionBegin; 510 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 511 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 512 t = a->solve_work; 513 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 514 515 /* solve U^T * D * y = b by forward substitution */ 516 tp = t; 517 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 518 idx = 5*r[k]; 519 tp[0] = b[idx]; 520 tp[1] = b[idx+1]; 521 tp[2] = b[idx+2]; 522 tp[3] = b[idx+3]; 523 tp[4] = b[idx+4]; 524 tp += 5; 525 } 526 527 for (k=0; k<mbs; k++){ 528 v = aa + 25*ai[k]; 529 vj = aj + ai[k]; 530 tp = t + k*5; 531 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 532 nz = ai[k+1] - ai[k]; 533 534 tp = t + (*vj)*5; 535 while (nz--) { 536 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 537 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 538 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 539 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 540 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 541 vj++; tp = t + (*vj)*5; 542 v += 25; 543 } 544 545 /* xk = inv(Dk)*(Dk*xk) */ 546 diag = aa+k*25; /* ptr to inv(Dk) */ 547 tp = t + k*5; 548 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 549 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 550 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 551 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 552 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 553 } 554 555 /* solve U*x = y by back substitution */ 556 for (k=mbs-1; k>=0; k--){ 557 v = aa + 25*ai[k]; 558 vj = aj + ai[k]; 559 tp = t + k*5; 560 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 561 nz = ai[k+1] - ai[k]; 562 563 tp = t + (*vj)*5; 564 while (nz--) { 565 /* xk += U(k,:)*x(:) */ 566 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 567 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 568 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 569 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 570 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 571 vj++; tp = t + (*vj)*5; 572 v += 25; 573 } 574 tp = t + k*5; 575 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 576 idx = 5*r[k]; 577 x[idx] = x0; 578 x[idx+1] = x1; 579 x[idx+2] = x2; 580 x[idx+3] = x3; 581 x[idx+4] = x4; 582 } 583 584 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 585 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 586 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 587 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 593 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 594 { 595 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 596 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 597 MatScalar *aa=a->a,*v,*diag; 598 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4; 599 PetscErrorCode ierr; 600 PetscInt nz,*vj,k; 601 602 PetscFunctionBegin; 603 604 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 605 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 606 607 /* solve U^T * D * y = b by forward substitution */ 608 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 609 for (k=0; k<mbs; k++){ 610 v = aa + 25*ai[k]; 611 xp = x + k*5; 612 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 613 nz = ai[k+1] - ai[k]; 614 vj = aj + ai[k]; 615 xp = x + (*vj)*5; 616 while (nz--) { 617 /* x(:) += U(k,:)^T*(Dk*xk) */ 618 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 619 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 620 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 621 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 622 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 623 vj++; xp = x + (*vj)*5; 624 v += 25; 625 } 626 /* xk = inv(Dk)*(Dk*xk) */ 627 diag = aa+k*25; /* ptr to inv(Dk) */ 628 xp = x + k*5; 629 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 630 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 631 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 632 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 633 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 634 } 635 636 /* solve U*x = y by back substitution */ 637 for (k=mbs-1; k>=0; k--){ 638 v = aa + 25*ai[k]; 639 xp = x + k*5; 640 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 641 nz = ai[k+1] - ai[k]; 642 vj = aj + ai[k]; 643 xp = x + (*vj)*5; 644 while (nz--) { 645 /* xk += U(k,:)*x(:) */ 646 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 647 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 648 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 649 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 650 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 651 vj++; 652 v += 25; xp = x + (*vj)*5; 653 } 654 xp = x + k*5; 655 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 656 } 657 658 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 659 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 660 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 666 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 667 { 668 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 669 IS isrow=a->row; 670 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 671 PetscErrorCode ierr; 672 PetscInt nz,*vj,k,*r,idx; 673 MatScalar *aa=a->a,*v,*diag; 674 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 675 676 PetscFunctionBegin; 677 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 678 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 679 t = a->solve_work; 680 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 681 682 /* solve U^T * D * y = b by forward substitution */ 683 tp = t; 684 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 685 idx = 4*r[k]; 686 tp[0] = b[idx]; 687 tp[1] = b[idx+1]; 688 tp[2] = b[idx+2]; 689 tp[3] = b[idx+3]; 690 tp += 4; 691 } 692 693 for (k=0; k<mbs; k++){ 694 v = aa + 16*ai[k]; 695 vj = aj + ai[k]; 696 tp = t + k*4; 697 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 698 nz = ai[k+1] - ai[k]; 699 700 tp = t + (*vj)*4; 701 while (nz--) { 702 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 703 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 704 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 705 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 706 vj++; tp = t + (*vj)*4; 707 v += 16; 708 } 709 710 /* xk = inv(Dk)*(Dk*xk) */ 711 diag = aa+k*16; /* ptr to inv(Dk) */ 712 tp = t + k*4; 713 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 714 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 715 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 716 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 717 } 718 719 /* solve U*x = y by back substitution */ 720 for (k=mbs-1; k>=0; k--){ 721 v = aa + 16*ai[k]; 722 vj = aj + ai[k]; 723 tp = t + k*4; 724 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 725 nz = ai[k+1] - ai[k]; 726 727 tp = t + (*vj)*4; 728 while (nz--) { 729 /* xk += U(k,:)*x(:) */ 730 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 731 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 732 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 733 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 734 vj++; tp = t + (*vj)*4; 735 v += 16; 736 } 737 tp = t + k*4; 738 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 739 idx = 4*r[k]; 740 x[idx] = x0; 741 x[idx+1] = x1; 742 x[idx+2] = x2; 743 x[idx+3] = x3; 744 } 745 746 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 747 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 748 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 749 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 /* 754 Special case where the matrix was factored in the natural ordering. 755 This eliminates the need for the column and row permutation. 756 */ 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 759 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 760 { 761 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 762 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 763 MatScalar *aa=a->a,*v,*diag; 764 PetscScalar *x,*xp,*b,x0,x1,x2,x3; 765 PetscErrorCode ierr; 766 PetscInt nz,*vj,k; 767 768 PetscFunctionBegin; 769 770 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 771 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 772 773 /* solve U^T * D * y = b by forward substitution */ 774 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 775 for (k=0; k<mbs; k++){ 776 v = aa + 16*ai[k]; 777 xp = x + k*4; 778 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 779 nz = ai[k+1] - ai[k]; 780 vj = aj + ai[k]; 781 xp = x + (*vj)*4; 782 while (nz--) { 783 /* x(:) += U(k,:)^T*(Dk*xk) */ 784 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 785 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 786 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 787 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 788 vj++; xp = x + (*vj)*4; 789 v += 16; 790 } 791 /* xk = inv(Dk)*(Dk*xk) */ 792 diag = aa+k*16; /* ptr to inv(Dk) */ 793 xp = x + k*4; 794 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 795 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 796 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 797 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 798 } 799 800 /* solve U*x = y by back substitution */ 801 for (k=mbs-1; k>=0; k--){ 802 v = aa + 16*ai[k]; 803 xp = x + k*4; 804 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 805 nz = ai[k+1] - ai[k]; 806 vj = aj + ai[k]; 807 xp = x + (*vj)*4; 808 while (nz--) { 809 /* xk += U(k,:)*x(:) */ 810 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 811 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 812 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 813 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 814 vj++; 815 v += 16; xp = x + (*vj)*4; 816 } 817 xp = x + k*4; 818 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 819 } 820 821 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 822 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 823 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 829 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 830 { 831 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 832 IS isrow=a->row; 833 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 834 PetscErrorCode ierr; 835 PetscInt nz,*vj,k,*r,idx; 836 MatScalar *aa=a->a,*v,*diag; 837 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 838 839 PetscFunctionBegin; 840 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 841 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 842 t = a->solve_work; 843 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 844 845 /* solve U^T * D * y = b by forward substitution */ 846 tp = t; 847 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 848 idx = 3*r[k]; 849 tp[0] = b[idx]; 850 tp[1] = b[idx+1]; 851 tp[2] = b[idx+2]; 852 tp += 3; 853 } 854 855 for (k=0; k<mbs; k++){ 856 v = aa + 9*ai[k]; 857 vj = aj + ai[k]; 858 tp = t + k*3; 859 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 860 nz = ai[k+1] - ai[k]; 861 862 tp = t + (*vj)*3; 863 while (nz--) { 864 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 865 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 866 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 867 vj++; tp = t + (*vj)*3; 868 v += 9; 869 } 870 871 /* xk = inv(Dk)*(Dk*xk) */ 872 diag = aa+k*9; /* ptr to inv(Dk) */ 873 tp = t + k*3; 874 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 875 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 876 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 877 } 878 879 /* solve U*x = y by back substitution */ 880 for (k=mbs-1; k>=0; k--){ 881 v = aa + 9*ai[k]; 882 vj = aj + ai[k]; 883 tp = t + k*3; 884 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 885 nz = ai[k+1] - ai[k]; 886 887 tp = t + (*vj)*3; 888 while (nz--) { 889 /* xk += U(k,:)*x(:) */ 890 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 891 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 892 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 893 vj++; tp = t + (*vj)*3; 894 v += 9; 895 } 896 tp = t + k*3; 897 tp[0] = x0; tp[1] = x1; tp[2] = x2; 898 idx = 3*r[k]; 899 x[idx] = x0; 900 x[idx+1] = x1; 901 x[idx+2] = x2; 902 } 903 904 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 905 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 906 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 907 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 /* 912 Special case where the matrix was factored in the natural ordering. 913 This eliminates the need for the column and row permutation. 914 */ 915 #undef __FUNCT__ 916 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 917 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 918 { 919 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 920 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 921 MatScalar *aa=a->a,*v,*diag; 922 PetscScalar *x,*xp,*b,x0,x1,x2; 923 PetscErrorCode ierr; 924 PetscInt nz,*vj,k; 925 926 PetscFunctionBegin; 927 928 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 929 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 930 931 /* solve U^T * D * y = b by forward substitution */ 932 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 933 for (k=0; k<mbs; k++){ 934 v = aa + 9*ai[k]; 935 xp = x + k*3; 936 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 937 nz = ai[k+1] - ai[k]; 938 vj = aj + ai[k]; 939 xp = x + (*vj)*3; 940 while (nz--) { 941 /* x(:) += U(k,:)^T*(Dk*xk) */ 942 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 943 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 944 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 945 vj++; xp = x + (*vj)*3; 946 v += 9; 947 } 948 /* xk = inv(Dk)*(Dk*xk) */ 949 diag = aa+k*9; /* ptr to inv(Dk) */ 950 xp = x + k*3; 951 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 952 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 953 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 954 } 955 956 /* solve U*x = y by back substitution */ 957 for (k=mbs-1; k>=0; k--){ 958 v = aa + 9*ai[k]; 959 xp = x + k*3; 960 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 961 nz = ai[k+1] - ai[k]; 962 vj = aj + ai[k]; 963 xp = x + (*vj)*3; 964 while (nz--) { 965 /* xk += U(k,:)*x(:) */ 966 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 967 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 968 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 969 vj++; 970 v += 9; xp = x + (*vj)*3; 971 } 972 xp = x + k*3; 973 xp[0] = x0; xp[1] = x1; xp[2] = x2; 974 } 975 976 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 977 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 978 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 984 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 985 { 986 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 987 IS isrow=a->row; 988 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 989 PetscErrorCode ierr; 990 PetscInt nz,*vj,k,k2,*r,idx; 991 MatScalar *aa=a->a,*v,*diag; 992 PetscScalar *x,*b,x0,x1,*t; 993 994 PetscFunctionBegin; 995 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 996 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 997 t = a->solve_work; 998 /* printf("called MatSolve_SeqSBAIJ_2\n"); */ 999 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1000 1001 /* solve U^T * D * y = perm(b) by forward substitution */ 1002 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1003 idx = 2*r[k]; 1004 t[k*2] = b[idx]; 1005 t[k*2+1] = b[idx+1]; 1006 } 1007 for (k=0; k<mbs; k++){ 1008 v = aa + 4*ai[k]; 1009 vj = aj + ai[k]; 1010 k2 = k*2; 1011 x0 = t[k2]; x1 = t[k2+1]; 1012 nz = ai[k+1] - ai[k]; 1013 while (nz--) { 1014 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1015 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1016 vj++; v += 4; 1017 } 1018 diag = aa+k*4; /* ptr to inv(Dk) */ 1019 t[k2] = diag[0]*x0 + diag[2]*x1; 1020 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1021 } 1022 1023 /* solve U*x = y by back substitution */ 1024 for (k=mbs-1; k>=0; k--){ 1025 v = aa + 4*ai[k]; 1026 vj = aj + ai[k]; 1027 k2 = k*2; 1028 x0 = t[k2]; x1 = t[k2+1]; 1029 nz = ai[k+1] - ai[k]; 1030 while (nz--) { 1031 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1032 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1033 vj++; v += 4; 1034 } 1035 t[k2] = x0; 1036 t[k2+1] = x1; 1037 idx = 2*r[k]; 1038 x[idx] = x0; 1039 x[idx+1] = x1; 1040 } 1041 1042 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1043 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1044 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1045 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 /* 1050 Special case where the matrix was factored in the natural ordering. 1051 This eliminates the need for the column and row permutation. 1052 */ 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1055 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1056 { 1057 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1058 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1059 MatScalar *aa=a->a,*v,*diag; 1060 PetscScalar *x,*b,x0,x1; 1061 PetscErrorCode ierr; 1062 PetscInt nz,*vj,k,k2; 1063 1064 PetscFunctionBegin; 1065 1066 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1067 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1068 1069 /* solve U^T * D * y = b by forward substitution */ 1070 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1071 for (k=0; k<mbs; k++){ 1072 v = aa + 4*ai[k]; 1073 vj = aj + ai[k]; 1074 k2 = k*2; 1075 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1076 nz = ai[k+1] - ai[k]; 1077 1078 while (nz--) { 1079 /* x(:) += U(k,:)^T*(Dk*xk) */ 1080 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1081 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1082 vj++; v += 4; 1083 } 1084 /* xk = inv(Dk)*(Dk*xk) */ 1085 diag = aa+k*4; /* ptr to inv(Dk) */ 1086 x[k2] = diag[0]*x0 + diag[2]*x1; 1087 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1088 } 1089 1090 /* solve U*x = y by back substitution */ 1091 for (k=mbs-1; k>=0; k--){ 1092 v = aa + 4*ai[k]; 1093 vj = aj + ai[k]; 1094 k2 = k*2; 1095 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1096 nz = ai[k+1] - ai[k]; 1097 while (nz--) { 1098 /* xk += U(k,:)*x(:) */ 1099 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1100 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1101 vj++; v += 4; 1102 } 1103 x[k2] = x0; 1104 x[k2+1] = x1; 1105 } 1106 1107 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1108 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1109 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */ 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1115 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1116 { 1117 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1118 IS isrow=a->row; 1119 PetscErrorCode ierr; 1120 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rip; 1121 MatScalar *aa=a->a,*v; 1122 PetscScalar *x,*b,xk,*t; 1123 PetscInt nz,*vj,k; 1124 1125 PetscFunctionBegin; 1126 if (!mbs) PetscFunctionReturn(0); 1127 /* printf(" MatSolve_SeqSBAIJ_1 is called\n"); */ 1128 1129 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1130 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1131 t = a->solve_work; 1132 1133 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1134 1135 /* solve U^T*D*y = perm(b) by forward substitution */ 1136 for (k=0; k<mbs; k++) t[k] = b[rip[k]]; 1137 for (k=0; k<mbs; k++){ 1138 v = aa + ai[k] + 1; 1139 vj = aj + ai[k] + 1; 1140 xk = t[k]; 1141 nz = ai[k+1] - ai[k] - 1; 1142 while (nz--) t[*vj++] += (*v++) * xk; 1143 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1144 } 1145 1146 /* solve U*x = y by back substitution */ 1147 for (k=mbs-1; k>=0; k--){ 1148 v = aa + ai[k] + 1; 1149 vj = aj + ai[k] + 1; 1150 xk = t[k]; 1151 nz = ai[k+1] - ai[k] - 1; 1152 while (nz--) xk += (*v++) * t[*vj++]; 1153 t[k] = xk; 1154 x[rip[k]] = xk; 1155 } 1156 1157 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1158 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1159 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1160 ierr = PetscLogFlops(4*a->nz + A->m);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1166 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1167 { 1168 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1169 PetscErrorCode ierr; 1170 1171 PetscFunctionBegin; 1172 if (A->bs == 1) { 1173 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1174 } else { 1175 IS isrow=a->row; 1176 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i; 1177 MatScalar *aa=a->a,*v; 1178 PetscScalar *x,*b,*t; 1179 PetscInt nz,*vj,k,n; 1180 if (bb->n > a->solves_work_n) { 1181 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 1182 ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1183 a->solves_work_n = bb->n; 1184 } 1185 n = bb->n; 1186 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1187 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1188 t = a->solves_work; 1189 1190 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1191 1192 /* solve U^T*D*y = perm(b) by forward substitution */ 1193 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */ 1194 for (k=0; k<mbs; k++){ 1195 v = aa + ai[k]; 1196 vj = aj + ai[k]; 1197 nz = ai[k+1] - ai[k]; 1198 while (nz--) { 1199 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1200 v++;vj++; 1201 } 1202 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1203 } 1204 1205 /* solve U*x = y by back substitution */ 1206 for (k=mbs-1; k>=0; k--){ 1207 v = aa + ai[k]; 1208 vj = aj + ai[k]; 1209 nz = ai[k+1] - ai[k]; 1210 while (nz--) { 1211 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1212 v++;vj++; 1213 } 1214 for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i]; 1215 } 1216 1217 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1218 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1219 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1220 ierr = PetscLogFlops(bb->n*(4*a->nz + A->m));CHKERRQ(ierr); 1221 } 1222 PetscFunctionReturn(0); 1223 } 1224 1225 /* 1226 Special case where the matrix was ILU(0) factored in the natural 1227 ordering. This eliminates the need for the column and row permutation. 1228 */ 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1231 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1232 { 1233 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1234 PetscErrorCode ierr; 1235 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1236 MatScalar *aa=a->a,*v; 1237 PetscScalar *x,*b,xk; 1238 PetscInt nz,*vj,k; 1239 1240 PetscFunctionBegin; 1241 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1242 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1243 1244 /* solve U^T*D*y = b by forward substitution */ 1245 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1246 for (k=0; k<mbs; k++){ 1247 v = aa + ai[k] + 1; 1248 vj = aj + ai[k] + 1; 1249 xk = x[k]; 1250 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1251 while (nz--) x[*vj++] += (*v++) * xk; 1252 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1253 } 1254 1255 /* solve U*x = y by back substitution */ 1256 for (k=mbs-2; k>=0; k--){ 1257 v = aa + ai[k] + 1; 1258 vj = aj + ai[k] + 1; 1259 xk = x[k]; 1260 nz = ai[k+1] - ai[k] - 1; 1261 while (nz--) xk += (*v++) * x[*vj++]; 1262 x[k] = xk; 1263 } 1264 1265 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1266 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1267 ierr = PetscLogFlops(4*a->nz + A->m);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1272 #undef __FUNCT__ 1273 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 1274 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1275 { 1276 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1277 PetscErrorCode ierr; 1278 PetscInt *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1279 PetscInt *jutmp,bs = A->bs,bs2=a->bs2; 1280 PetscInt m,reallocs = 0,*levtmp; 1281 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 1282 PetscInt incrlev,*lev,shift,prow,nz; 1283 PetscReal f = info->fill,levels = info->levels; 1284 PetscTruth perm_identity; 1285 1286 PetscFunctionBegin; 1287 /* check whether perm is the identity mapping */ 1288 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1289 1290 if (perm_identity){ 1291 a->permute = PETSC_FALSE; 1292 ai = a->i; aj = a->j; 1293 } else { /* non-trivial permutation */ 1294 a->permute = PETSC_TRUE; 1295 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1296 ai = a->inew; aj = a->jnew; 1297 } 1298 1299 /* initialization */ 1300 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1301 umax = (PetscInt)(f*ai[mbs] + 1); 1302 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 1303 umax += mbs + 1; 1304 shift = mbs + 1; 1305 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 1306 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 1307 iu[0] = mbs + 1; 1308 juidx = mbs + 1; 1309 /* prowl: linked list for pivot row */ 1310 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr); 1311 /* q: linked list for col index */ 1312 q = prowl + mbs; 1313 levtmp = q + mbs; 1314 1315 for (i=0; i<mbs; i++){ 1316 prowl[i] = mbs; 1317 q[i] = 0; 1318 } 1319 1320 /* for each row k */ 1321 for (k=0; k<mbs; k++){ 1322 nzk = 0; 1323 q[k] = mbs; 1324 /* copy current row into linked list */ 1325 nz = ai[rip[k]+1] - ai[rip[k]]; 1326 j = ai[rip[k]]; 1327 while (nz--){ 1328 vj = rip[aj[j++]]; 1329 if (vj > k){ 1330 qm = k; 1331 do { 1332 m = qm; qm = q[m]; 1333 } while(qm < vj); 1334 if (qm == vj) { 1335 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 1336 } 1337 nzk++; 1338 q[m] = vj; 1339 q[vj] = qm; 1340 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1341 } 1342 } 1343 1344 /* modify nonzero structure of k-th row by computing fill-in 1345 for each row prow to be merged in */ 1346 prow = k; 1347 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1348 1349 while (prow < k){ 1350 /* merge row prow into k-th row */ 1351 jmin = iu[prow] + 1; 1352 jmax = iu[prow+1]; 1353 qm = k; 1354 for (j=jmin; j<jmax; j++){ 1355 incrlev = lev[j-shift] + 1; 1356 if (incrlev > levels) continue; 1357 vj = ju[j]; 1358 do { 1359 m = qm; qm = q[m]; 1360 } while (qm < vj); 1361 if (qm != vj){ /* a fill */ 1362 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1363 levtmp[vj] = incrlev; 1364 } else { 1365 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1366 } 1367 } 1368 prow = prowl[prow]; /* next pivot row */ 1369 } 1370 1371 /* add k to row list for first nonzero element in k-th row */ 1372 if (nzk > 1){ 1373 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1374 prowl[k] = prowl[i]; prowl[i] = k; 1375 } 1376 iu[k+1] = iu[k] + nzk; 1377 1378 /* allocate more space to ju and lev if needed */ 1379 if (iu[k+1] > umax) { 1380 /* estimate how much additional space we will need */ 1381 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1382 /* just double the memory each time */ 1383 maxadd = umax; 1384 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1385 umax += maxadd; 1386 1387 /* allocate a longer ju */ 1388 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1389 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 1390 ierr = PetscFree(ju);CHKERRQ(ierr); 1391 ju = jutmp; 1392 1393 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1394 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 1395 ierr = PetscFree(lev);CHKERRQ(ierr); 1396 lev = jutmp; 1397 reallocs += 2; /* count how many times we realloc */ 1398 } 1399 1400 /* save nonzero structure of k-th row in ju */ 1401 i=k; 1402 while (nzk --) { 1403 i = q[i]; 1404 ju[juidx] = i; 1405 lev[juidx-shift] = levtmp[i]; 1406 juidx++; 1407 } 1408 } 1409 1410 #if defined(PETSC_USE_VERBOSE) 1411 if (ai[mbs] != 0) { 1412 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1413 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 1414 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af));CHKERRQ(ierr); 1415 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af));CHKERRQ(ierr); 1416 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n"));CHKERRQ(ierr); 1417 } else { 1418 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1419 } 1420 #endif 1421 1422 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1423 ierr = PetscFree(prowl);CHKERRQ(ierr); 1424 ierr = PetscFree(lev);CHKERRQ(ierr); 1425 1426 /* put together the new matrix */ 1427 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 1428 ierr = MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);CHKERRQ(ierr); 1429 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 1430 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1431 1432 /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */ 1433 b = (Mat_SeqSBAIJ*)(*B)->data; 1434 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 1435 b->singlemalloc = PETSC_FALSE; 1436 /* the next line frees the default space generated by the Create() */ 1437 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 1438 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1439 b->j = ju; 1440 b->i = iu; 1441 b->diag = 0; 1442 b->ilen = 0; 1443 b->imax = 0; 1444 1445 if (b->row) { 1446 ierr = ISDestroy(b->row);CHKERRQ(ierr); 1447 } 1448 if (b->icol) { 1449 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 1450 } 1451 b->row = perm; 1452 b->icol = perm; 1453 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1454 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1455 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1456 /* In b structure: Free imax, ilen, old a, old j. 1457 Allocate idnew, solve_work, new a, new j */ 1458 ierr = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1459 b->maxnz = b->nz = iu[mbs]; 1460 1461 (*B)->factor = FACTOR_CHOLESKY; 1462 (*B)->info.factor_mallocs = reallocs; 1463 (*B)->info.fill_ratio_given = f; 1464 if (ai[mbs] != 0) { 1465 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1466 } else { 1467 (*B)->info.fill_ratio_needed = 0.0; 1468 } 1469 1470 if (perm_identity){ 1471 switch (bs) { 1472 case 1: 1473 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1474 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1475 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1476 (*B)->ops->solves = MatSolves_SeqSBAIJ_1; 1477 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n"));CHKERRQ(ierr); 1478 break; 1479 case 2: 1480 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1481 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1482 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1483 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 1484 break; 1485 case 3: 1486 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1487 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1488 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1489 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 1490 break; 1491 case 4: 1492 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1493 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1494 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1495 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 1496 break; 1497 case 5: 1498 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1499 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1500 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1501 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 1502 break; 1503 case 6: 1504 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1505 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1506 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1507 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 1508 break; 1509 case 7: 1510 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1511 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1512 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1513 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 1514 break; 1515 default: 1516 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 1517 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1518 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1519 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"));CHKERRQ(ierr); 1520 break; 1521 } 1522 } 1523 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #include "petscbt.h" 1528 #include "src/mat/utils/freespace.h" 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 1531 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1532 { 1533 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1534 Mat_SeqSBAIJ *b; 1535 Mat B; 1536 PetscErrorCode ierr; 1537 PetscTruth perm_identity; 1538 PetscInt bs=A->bs,am=a->mbs; 1539 PetscInt reallocs=0,*rip,i,*ai,*aj,*ui; 1540 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1541 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1542 PetscReal fill=info->fill,levels=info->levels; 1543 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1544 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1545 PetscBT lnkbt; 1546 1547 PetscFunctionBegin; 1548 /* 1549 This code originally uses Modified Sparse Row (MSR) storage 1550 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise! 1551 Then it is rewritten so the factor B takes seqsbaij format. However the associated 1552 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 1553 thus the original code in MSR format is still used for these cases. 1554 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 1555 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 1556 */ 1557 if (bs > 1){ 1558 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 /* check whether perm is the identity mapping */ 1563 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1564 1565 /* special case that simply copies fill pattern */ 1566 if (!levels && perm_identity) { 1567 a->permute = PETSC_FALSE; 1568 ai = a->i; aj = a->j; 1569 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1570 ierr = PetscMemcpy(ui,ai,(am+1)*sizeof(PetscInt));CHKERRQ(ierr); 1571 ierr = PetscMalloc(ai[am]*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1572 ierr = PetscMemcpy(uj,aj,ai[am]*sizeof(PetscInt));CHKERRQ(ierr); 1573 1574 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1575 if (perm_identity){ 1576 a->permute = PETSC_FALSE; 1577 ai = a->i; aj = a->j; 1578 } else { /* non-trivial permutation */ 1579 a->permute = PETSC_TRUE; 1580 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1581 ai = a->inew; aj = a->jnew; 1582 } 1583 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1584 1585 /* initialization */ 1586 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1587 ui[0] = 0; 1588 1589 /* jl: linked list for storing indices of the pivot rows 1590 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1591 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1592 il = jl + am; 1593 uj_ptr = (PetscInt**)(il + am); 1594 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1595 for (i=0; i<am; i++){ 1596 jl[i] = am; il[i] = 0; 1597 } 1598 1599 /* create and initialize a linked list for storing column indices of the active row k */ 1600 nlnk = am + 1; 1601 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1602 1603 /* initial FreeSpace size is fill*(ai[am]+1) */ 1604 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1605 current_space = free_space; 1606 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1607 current_space_lvl = free_space_lvl; 1608 1609 for (k=0; k<am; k++){ /* for each active row k */ 1610 /* initialize lnk by the column indices of row rip[k] */ 1611 nzk = 0; 1612 ncols = ai[rip[k]+1] - ai[rip[k]]; 1613 cols = aj+ai[rip[k]]; 1614 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1615 nzk += nlnk; 1616 1617 /* update lnk by computing fill-in for each pivot row to be merged in */ 1618 prow = jl[k]; /* 1st pivot row */ 1619 1620 while (prow < k){ 1621 nextprow = jl[prow]; 1622 1623 /* merge prow into k-th row */ 1624 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1625 jmax = ui[prow+1]; 1626 ncols = jmax-jmin; 1627 i = jmin - ui[prow]; 1628 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1629 j = *(uj_lvl_ptr[prow] + i - 1); 1630 cols_lvl = uj_lvl_ptr[prow]+i; 1631 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1632 nzk += nlnk; 1633 1634 /* update il and jl for prow */ 1635 if (jmin < jmax){ 1636 il[prow] = jmin; 1637 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1638 } 1639 prow = nextprow; 1640 } 1641 1642 /* if free space is not available, make more free space */ 1643 if (current_space->local_remaining<nzk) { 1644 i = am - k + 1; /* num of unfactored rows */ 1645 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1646 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1647 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1648 reallocs++; 1649 } 1650 1651 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1652 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1653 1654 /* add the k-th row into il and jl */ 1655 if (nzk-1 > 0){ 1656 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1657 jl[k] = jl[i]; jl[i] = k; 1658 il[k] = ui[k] + 1; 1659 } 1660 uj_ptr[k] = current_space->array; 1661 uj_lvl_ptr[k] = current_space_lvl->array; 1662 1663 current_space->array += nzk; 1664 current_space->local_used += nzk; 1665 current_space->local_remaining -= nzk; 1666 current_space_lvl->array += nzk; 1667 current_space_lvl->local_used += nzk; 1668 current_space_lvl->local_remaining -= nzk; 1669 1670 ui[k+1] = ui[k] + nzk; 1671 } 1672 1673 #if defined(PETSC_USE_VERBOSE) 1674 if (ai[am] != 0) { 1675 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1676 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1677 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1678 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1679 } else { 1680 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1681 } 1682 #endif 1683 1684 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1685 ierr = PetscFree(jl);CHKERRQ(ierr); 1686 1687 /* destroy list of free space and other temporary array(s) */ 1688 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1689 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1690 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1691 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1692 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1693 1694 /* put together the new matrix in MATSEQSBAIJ format */ 1695 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1696 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1697 B = *fact; 1698 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1699 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1700 1701 b = (Mat_SeqSBAIJ*)B->data; 1702 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 1703 b->singlemalloc = PETSC_FALSE; 1704 /* the next line frees the default space generated by the Create() */ 1705 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 1706 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1707 b->j = uj; 1708 b->i = ui; 1709 b->diag = 0; 1710 b->ilen = 0; 1711 b->imax = 0; 1712 b->row = perm; 1713 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1714 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1715 b->icol = perm; 1716 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1717 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1718 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1719 b->maxnz = b->nz = ui[am]; 1720 1721 B->factor = FACTOR_CHOLESKY; 1722 B->info.factor_mallocs = reallocs; 1723 B->info.fill_ratio_given = fill; 1724 if (ai[am] != 0) { 1725 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1726 } else { 1727 B->info.fill_ratio_needed = 0.0; 1728 } 1729 1730 if (perm_identity){ 1731 switch (bs) { 1732 case 1: 1733 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1734 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1735 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1736 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n"));CHKERRQ(ierr); 1737 break; 1738 case 2: 1739 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1740 B->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1741 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1742 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 1743 break; 1744 case 3: 1745 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1746 B->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1747 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1748 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 1749 break; 1750 case 4: 1751 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1752 B->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1753 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1754 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 1755 break; 1756 case 5: 1757 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1758 B->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1759 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1760 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 1761 break; 1762 case 6: 1763 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1764 B->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1765 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1766 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 1767 break; 1768 case 7: 1769 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1770 B->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1771 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1772 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 1773 break; 1774 default: 1775 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 1776 B->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1777 B->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1778 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"));CHKERRQ(ierr); 1779 break; 1780 } 1781 } 1782 PetscFunctionReturn(0); 1783 } 1784 1785