1 2 /* 3 Factorization code for SBAIJ format. 4 */ 5 6 #include <../src/mat/impls/sbaij/seq/sbaij.h> 7 #include <../src/mat/impls/baij/seq/baij.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_inplace" 12 PetscErrorCode MatSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx) 13 { 14 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 15 IS isrow=a->row; 16 PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j; 17 PetscErrorCode ierr; 18 const PetscInt *r; 19 PetscInt nz,*vj,k,idx,k1; 20 PetscInt bs =A->rmap->bs,bs2 = a->bs2; 21 const MatScalar *aa=a->a,*v,*diag; 22 PetscScalar *x,*xk,*xj,*xk_tmp,*t; 23 const PetscScalar *b; 24 25 PetscFunctionBegin; 26 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 t = a->solve_work; 29 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 30 ierr = PetscMalloc1(bs,&xk_tmp);CHKERRQ(ierr); 31 32 /* solve U^T * D * y = b by forward substitution */ 33 xk = t; 34 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 35 idx = bs*r[k]; 36 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 37 } 38 for (k=0; k<mbs; k++) { 39 v = aa + bs2*ai[k]; 40 xk = t + k*bs; /* Dk*xk = k-th block of x */ 41 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 42 nz = ai[k+1] - ai[k]; 43 vj = aj + ai[k]; 44 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 45 while (nz--) { 46 /* x(:) += U(k,:)^T*(Dk*xk) */ 47 PetscKernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 48 vj++; xj = t + (*vj)*bs; 49 v += bs2; 50 } 51 /* xk = inv(Dk)*(Dk*xk) */ 52 diag = aa+k*bs2; /* ptr to inv(Dk) */ 53 PetscKernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 54 } 55 56 /* solve U*x = y by back substitution */ 57 for (k=mbs-1; k>=0; k--) { 58 v = aa + bs2*ai[k]; 59 xk = t + k*bs; /* xk */ 60 nz = ai[k+1] - ai[k]; 61 vj = aj + ai[k]; 62 xj = t + (*vj)*bs; 63 while (nz--) { 64 /* xk += U(k,:)*x(:) */ 65 PetscKernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 66 vj++; 67 v += bs2; xj = t + (*vj)*bs; 68 } 69 idx = bs*r[k]; 70 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 71 } 72 73 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 74 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 75 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 76 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 77 ierr = PetscLogFlops(4.0*bs2*a->nz -(bs+2.0*bs2)*mbs);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_inplace" 83 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx) 84 { 85 PetscFunctionBegin; 86 SETERRQ(PETSC_COMM_SELF,1,"not implemented yet"); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_inplace" 92 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx) 93 { 94 PetscFunctionBegin; 95 SETERRQ(PETSC_COMM_SELF,1,"not implemented yet"); 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering" 101 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 102 { 103 PetscErrorCode ierr; 104 PetscInt nz,k; 105 const PetscInt *vj,bs2 = bs*bs; 106 const MatScalar *v,*diag; 107 PetscScalar *xk,*xj,*xk_tmp; 108 109 PetscFunctionBegin; 110 ierr = PetscMalloc1(bs,&xk_tmp);CHKERRQ(ierr); 111 for (k=0; k<mbs; k++) { 112 v = aa + bs2*ai[k]; 113 xk = x + k*bs; /* Dk*xk = k-th block of x */ 114 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 115 nz = ai[k+1] - ai[k]; 116 vj = aj + ai[k]; 117 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 118 while (nz--) { 119 /* x(:) += U(k,:)^T*(Dk*xk) */ 120 PetscKernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 121 vj++; xj = x + (*vj)*bs; 122 v += bs2; 123 } 124 /* xk = inv(Dk)*(Dk*xk) */ 125 diag = aa+k*bs2; /* ptr to inv(Dk) */ 126 PetscKernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 127 } 128 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering" 134 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 135 { 136 PetscInt nz,k; 137 const PetscInt *vj,bs2 = bs*bs; 138 const MatScalar *v; 139 PetscScalar *xk,*xj; 140 141 PetscFunctionBegin; 142 for (k=mbs-1; k>=0; k--) { 143 v = aa + bs2*ai[k]; 144 xk = x + k*bs; /* xk */ 145 nz = ai[k+1] - ai[k]; 146 vj = aj + ai[k]; 147 xj = x + (*vj)*bs; 148 while (nz--) { 149 /* xk += U(k,:)*x(:) */ 150 PetscKernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 151 vj++; 152 v += bs2; xj = x + (*vj)*bs; 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace" 160 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 161 { 162 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 163 PetscErrorCode ierr; 164 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 165 PetscInt bs =A->rmap->bs; 166 const MatScalar *aa=a->a; 167 PetscScalar *x; 168 const PetscScalar *b; 169 170 PetscFunctionBegin; 171 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 172 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 173 174 /* solve U^T * D * y = b by forward substitution */ 175 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 176 ierr = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 177 178 /* solve U*x = y by back substitution */ 179 ierr = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 180 181 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 182 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 183 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 #undef __FUNCT__ 188 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace" 189 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 190 { 191 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 192 PetscErrorCode ierr; 193 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 194 PetscInt bs =A->rmap->bs; 195 const MatScalar *aa=a->a; 196 const PetscScalar *b; 197 PetscScalar *x; 198 199 PetscFunctionBegin; 200 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 201 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 202 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 203 ierr = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 204 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 205 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 206 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 207 PetscFunctionReturn(0); 208 } 209 210 #undef __FUNCT__ 211 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace" 212 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 213 { 214 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 215 PetscErrorCode ierr; 216 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 217 PetscInt bs =A->rmap->bs; 218 const MatScalar *aa=a->a; 219 const PetscScalar *b; 220 PetscScalar *x; 221 222 PetscFunctionBegin; 223 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 224 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 225 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 226 ierr = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 227 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 228 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 229 ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_inplace" 235 PetscErrorCode MatSolve_SeqSBAIJ_7_inplace(Mat A,Vec bb,Vec xx) 236 { 237 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 238 IS isrow=a->row; 239 const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2,bs=A->rmap->bs,*r,*vj; 240 PetscErrorCode ierr; 241 PetscInt nz,k,idx; 242 const MatScalar *aa=a->a,*v,*d; 243 PetscScalar *x,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 244 const PetscScalar *b; 245 246 PetscFunctionBegin; 247 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 248 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 249 t = a->solve_work; 250 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 251 252 /* solve U^T * D * y = b by forward substitution */ 253 tp = t; 254 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 255 idx = 7*r[k]; 256 tp[0] = b[idx]; 257 tp[1] = b[idx+1]; 258 tp[2] = b[idx+2]; 259 tp[3] = b[idx+3]; 260 tp[4] = b[idx+4]; 261 tp[5] = b[idx+5]; 262 tp[6] = b[idx+6]; 263 tp += 7; 264 } 265 266 for (k=0; k<mbs; k++) { 267 v = aa + 49*ai[k]; 268 vj = aj + ai[k]; 269 tp = t + k*7; 270 x0 =tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 271 nz = ai[k+1] - ai[k]; 272 tp = t + (*vj)*7; 273 while (nz--) { 274 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 275 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 276 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 277 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 278 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 279 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 280 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 281 vj++; 282 tp = t + (*vj)*7; 283 v += 49; 284 } 285 286 /* xk = inv(Dk)*(Dk*xk) */ 287 d = aa+k*49; /* ptr to inv(Dk) */ 288 tp = t + k*7; 289 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 290 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 291 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 292 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 293 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 294 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 295 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 296 } 297 298 /* solve U*x = y by back substitution */ 299 for (k=mbs-1; k>=0; k--) { 300 v = aa + 49*ai[k]; 301 vj = aj + ai[k]; 302 tp = t + k*7; 303 x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 304 nz = ai[k+1] - ai[k]; 305 306 tp = t + (*vj)*7; 307 while (nz--) { 308 /* xk += U(k,:)*x(:) */ 309 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 310 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 311 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 312 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 313 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 314 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 315 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 316 vj++; 317 tp = t + (*vj)*7; 318 v += 49; 319 } 320 tp = t + k*7; 321 tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 322 idx = 7*r[k]; 323 x[idx] = x0; 324 x[idx+1] = x1; 325 x[idx+2] = x2; 326 x[idx+3] = x3; 327 x[idx+4] = x4; 328 x[idx+5] = x5; 329 x[idx+6] = x6; 330 } 331 332 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 333 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 334 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 335 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering" 341 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) 342 { 343 const MatScalar *v,*d; 344 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 345 PetscInt nz,k; 346 const PetscInt *vj; 347 348 PetscFunctionBegin; 349 for (k=0; k<mbs; k++) { 350 v = aa + 49*ai[k]; 351 xp = x + k*7; 352 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 353 nz = ai[k+1] - ai[k]; 354 vj = aj + ai[k]; 355 xp = x + (*vj)*7; 356 PetscPrefetchBlock(vj+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 357 PetscPrefetchBlock(v+49*nz,49*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 358 while (nz--) { 359 /* x(:) += U(k,:)^T*(Dk*xk) */ 360 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 361 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 362 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 363 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 364 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 365 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 366 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 367 vj++; 368 xp = x + (*vj)*7; 369 v += 49; 370 } 371 /* xk = inv(Dk)*(Dk*xk) */ 372 d = aa+k*49; /* ptr to inv(Dk) */ 373 xp = x + k*7; 374 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 375 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 376 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 377 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 378 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 379 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 380 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 381 } 382 PetscFunctionReturn(0); 383 } 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering" 387 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) 388 { 389 const MatScalar *v; 390 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 391 PetscInt nz,k; 392 const PetscInt *vj; 393 394 PetscFunctionBegin; 395 for (k=mbs-1; k>=0; k--) { 396 v = aa + 49*ai[k]; 397 xp = x + k*7; 398 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 399 nz = ai[k+1] - ai[k]; 400 vj = aj + ai[k]; 401 xp = x + (*vj)*7; 402 PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 403 PetscPrefetchBlock(v-49*nz,49*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 404 while (nz--) { 405 /* xk += U(k,:)*x(:) */ 406 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 407 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 408 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 409 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 410 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 411 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 412 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 413 vj++; 414 v += 49; xp = x + (*vj)*7; 415 } 416 xp = x + k*7; 417 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 418 } 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace" 424 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 425 { 426 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 427 PetscErrorCode ierr; 428 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 429 const MatScalar *aa=a->a; 430 PetscScalar *x; 431 const PetscScalar *b; 432 433 PetscFunctionBegin; 434 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 435 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 436 437 /* solve U^T * D * y = b by forward substitution */ 438 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 439 ierr = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 440 441 /* solve U*x = y by back substitution */ 442 ierr = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 443 444 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 445 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 446 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace" 452 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 453 { 454 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 455 PetscErrorCode ierr; 456 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 457 const MatScalar *aa=a->a; 458 PetscScalar *x; 459 const PetscScalar *b; 460 461 PetscFunctionBegin; 462 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 463 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 464 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 465 ierr = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 466 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 467 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 468 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace" 474 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 475 { 476 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 477 PetscErrorCode ierr; 478 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 479 const MatScalar *aa=a->a; 480 PetscScalar *x; 481 const PetscScalar *b; 482 483 PetscFunctionBegin; 484 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 485 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 486 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 487 ierr = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 488 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 489 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 490 ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); 491 PetscFunctionReturn(0); 492 } 493 494 #undef __FUNCT__ 495 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_inplace" 496 PetscErrorCode MatSolve_SeqSBAIJ_6_inplace(Mat A,Vec bb,Vec xx) 497 { 498 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 499 IS isrow=a->row; 500 const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2,*r,*vj; 501 PetscErrorCode ierr; 502 PetscInt nz,k,idx; 503 const MatScalar *aa=a->a,*v,*d; 504 PetscScalar *x,x0,x1,x2,x3,x4,x5,*t,*tp; 505 const PetscScalar *b; 506 507 PetscFunctionBegin; 508 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 509 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 510 t = a->solve_work; 511 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 512 513 /* solve U^T * D * y = b by forward substitution */ 514 tp = t; 515 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 516 idx = 6*r[k]; 517 tp[0] = b[idx]; 518 tp[1] = b[idx+1]; 519 tp[2] = b[idx+2]; 520 tp[3] = b[idx+3]; 521 tp[4] = b[idx+4]; 522 tp[5] = b[idx+5]; 523 tp += 6; 524 } 525 526 for (k=0; k<mbs; k++) { 527 v = aa + 36*ai[k]; 528 vj = aj + ai[k]; 529 tp = t + k*6; 530 x0 =tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 531 nz = ai[k+1] - ai[k]; 532 tp = t + (*vj)*6; 533 while (nz--) { 534 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 535 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 536 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 537 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 538 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 539 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 540 vj++; 541 tp = t + (*vj)*6; 542 v += 36; 543 } 544 545 /* xk = inv(Dk)*(Dk*xk) */ 546 d = aa+k*36; /* ptr to inv(Dk) */ 547 tp = t + k*6; 548 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 549 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 550 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 551 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 552 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 553 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 554 } 555 556 /* solve U*x = y by back substitution */ 557 for (k=mbs-1; k>=0; k--) { 558 v = aa + 36*ai[k]; 559 vj = aj + ai[k]; 560 tp = t + k*6; 561 x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 562 nz = ai[k+1] - ai[k]; 563 564 tp = t + (*vj)*6; 565 while (nz--) { 566 /* xk += U(k,:)*x(:) */ 567 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 568 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 569 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 570 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 571 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 572 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 573 vj++; 574 tp = t + (*vj)*6; 575 v += 36; 576 } 577 tp = t + k*6; 578 tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 579 idx = 6*r[k]; 580 x[idx] = x0; 581 x[idx+1] = x1; 582 x[idx+2] = x2; 583 x[idx+3] = x3; 584 x[idx+4] = x4; 585 x[idx+5] = x5; 586 } 587 588 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 589 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 590 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 591 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering" 597 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) 598 { 599 const MatScalar *v,*d; 600 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 601 PetscInt nz,k; 602 const PetscInt *vj; 603 604 PetscFunctionBegin; 605 for (k=0; k<mbs; k++) { 606 v = aa + 36*ai[k]; 607 xp = x + k*6; 608 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 609 nz = ai[k+1] - ai[k]; 610 vj = aj + ai[k]; 611 xp = x + (*vj)*6; 612 PetscPrefetchBlock(vj+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 613 PetscPrefetchBlock(v+36*nz,36*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 614 while (nz--) { 615 /* x(:) += U(k,:)^T*(Dk*xk) */ 616 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 617 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 618 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 619 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 620 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 621 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 622 vj++; 623 xp = x + (*vj)*6; 624 v += 36; 625 } 626 /* xk = inv(Dk)*(Dk*xk) */ 627 d = aa+k*36; /* ptr to inv(Dk) */ 628 xp = x + k*6; 629 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 630 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 631 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 632 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 633 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 634 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 635 } 636 PetscFunctionReturn(0); 637 } 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering" 640 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) 641 { 642 const MatScalar *v; 643 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 644 PetscInt nz,k; 645 const PetscInt *vj; 646 647 PetscFunctionBegin; 648 for (k=mbs-1; k>=0; k--) { 649 v = aa + 36*ai[k]; 650 xp = x + k*6; 651 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 652 nz = ai[k+1] - ai[k]; 653 vj = aj + ai[k]; 654 xp = x + (*vj)*6; 655 PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 656 PetscPrefetchBlock(v-36*nz,36*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 657 while (nz--) { 658 /* xk += U(k,:)*x(:) */ 659 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 660 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 661 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 662 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 663 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 664 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 665 vj++; 666 v += 36; xp = x + (*vj)*6; 667 } 668 xp = x + k*6; 669 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 670 } 671 PetscFunctionReturn(0); 672 } 673 674 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace" 677 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 678 { 679 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 680 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 681 const MatScalar *aa=a->a; 682 PetscScalar *x; 683 const PetscScalar *b; 684 PetscErrorCode ierr; 685 686 PetscFunctionBegin; 687 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 688 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 689 690 /* solve U^T * D * y = b by forward substitution */ 691 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 692 ierr = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 693 694 /* solve U*x = y by back substitution */ 695 ierr = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 696 697 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 698 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 699 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace" 705 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 706 { 707 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 708 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 709 const MatScalar *aa=a->a; 710 PetscScalar *x; 711 const PetscScalar *b; 712 PetscErrorCode ierr; 713 714 PetscFunctionBegin; 715 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 716 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 717 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 718 ierr = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 719 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 720 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 721 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace" 727 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 728 { 729 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 730 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 731 const MatScalar *aa=a->a; 732 PetscScalar *x; 733 const PetscScalar *b; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 738 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 739 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 740 ierr = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 741 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 742 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 743 ierr = PetscLogFlops(2.0*bs2*(a->nz - mbs));CHKERRQ(ierr); 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_inplace" 749 PetscErrorCode MatSolve_SeqSBAIJ_5_inplace(Mat A,Vec bb,Vec xx) 750 { 751 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 752 IS isrow=a->row; 753 const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2 = a->bs2; 754 PetscErrorCode ierr; 755 const PetscInt *r,*vj; 756 PetscInt nz,k,idx; 757 const MatScalar *aa=a->a,*v,*diag; 758 PetscScalar *x,x0,x1,x2,x3,x4,*t,*tp; 759 const PetscScalar *b; 760 761 PetscFunctionBegin; 762 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 763 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 764 t = a->solve_work; 765 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 766 767 /* solve U^T * D * y = b by forward substitution */ 768 tp = t; 769 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 770 idx = 5*r[k]; 771 tp[0] = b[idx]; 772 tp[1] = b[idx+1]; 773 tp[2] = b[idx+2]; 774 tp[3] = b[idx+3]; 775 tp[4] = b[idx+4]; 776 tp += 5; 777 } 778 779 for (k=0; k<mbs; k++) { 780 v = aa + 25*ai[k]; 781 vj = aj + ai[k]; 782 tp = t + k*5; 783 x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 784 nz = ai[k+1] - ai[k]; 785 786 tp = t + (*vj)*5; 787 while (nz--) { 788 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 789 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 790 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 791 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 792 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 793 vj++; 794 tp = t + (*vj)*5; 795 v += 25; 796 } 797 798 /* xk = inv(Dk)*(Dk*xk) */ 799 diag = aa+k*25; /* ptr to inv(Dk) */ 800 tp = t + k*5; 801 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 802 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 803 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 804 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 805 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 806 } 807 808 /* solve U*x = y by back substitution */ 809 for (k=mbs-1; k>=0; k--) { 810 v = aa + 25*ai[k]; 811 vj = aj + ai[k]; 812 tp = t + k*5; 813 x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; /* xk */ 814 nz = ai[k+1] - ai[k]; 815 816 tp = t + (*vj)*5; 817 while (nz--) { 818 /* xk += U(k,:)*x(:) */ 819 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 820 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 821 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 822 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 823 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 824 vj++; 825 tp = t + (*vj)*5; 826 v += 25; 827 } 828 tp = t + k*5; 829 tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 830 idx = 5*r[k]; 831 x[idx] = x0; 832 x[idx+1] = x1; 833 x[idx+2] = x2; 834 x[idx+3] = x3; 835 x[idx+4] = x4; 836 } 837 838 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 839 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 840 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 841 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 842 PetscFunctionReturn(0); 843 } 844 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering" 847 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) 848 { 849 const MatScalar *v,*diag; 850 PetscScalar *xp,x0,x1,x2,x3,x4; 851 PetscInt nz,k; 852 const PetscInt *vj; 853 854 PetscFunctionBegin; 855 for (k=0; k<mbs; k++) { 856 v = aa + 25*ai[k]; 857 xp = x + k*5; 858 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 859 nz = ai[k+1] - ai[k]; 860 vj = aj + ai[k]; 861 xp = x + (*vj)*5; 862 PetscPrefetchBlock(vj+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 863 PetscPrefetchBlock(v+25*nz,25*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 864 while (nz--) { 865 /* x(:) += U(k,:)^T*(Dk*xk) */ 866 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 867 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 868 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 869 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 870 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 871 vj++; 872 xp = x + (*vj)*5; 873 v += 25; 874 } 875 /* xk = inv(Dk)*(Dk*xk) */ 876 diag = aa+k*25; /* ptr to inv(Dk) */ 877 xp = x + k*5; 878 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 879 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 880 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 881 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 882 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 883 } 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering" 889 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) 890 { 891 const MatScalar *v; 892 PetscScalar *xp,x0,x1,x2,x3,x4; 893 PetscInt nz,k; 894 const PetscInt *vj; 895 896 PetscFunctionBegin; 897 for (k=mbs-1; k>=0; k--) { 898 v = aa + 25*ai[k]; 899 xp = x + k*5; 900 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; /* xk */ 901 nz = ai[k+1] - ai[k]; 902 vj = aj + ai[k]; 903 xp = x + (*vj)*5; 904 PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 905 PetscPrefetchBlock(v-25*nz,25*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 906 while (nz--) { 907 /* xk += U(k,:)*x(:) */ 908 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 909 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 910 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 911 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 912 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 913 vj++; 914 v += 25; xp = x + (*vj)*5; 915 } 916 xp = x + k*5; 917 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 918 } 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering_inplace" 924 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 925 { 926 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 927 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2 = a->bs2; 928 const MatScalar *aa=a->a; 929 PetscScalar *x; 930 const PetscScalar *b; 931 PetscErrorCode ierr; 932 933 PetscFunctionBegin; 934 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 935 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 936 937 /* solve U^T * D * y = b by forward substitution */ 938 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 939 ierr = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 940 941 /* solve U*x = y by back substitution */ 942 ierr = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 943 944 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 945 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 946 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace" 952 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 953 { 954 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 955 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 956 const MatScalar *aa=a->a; 957 PetscScalar *x; 958 const PetscScalar *b; 959 PetscErrorCode ierr; 960 961 PetscFunctionBegin; 962 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 963 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 964 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 965 ierr = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 966 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 967 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 968 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace" 974 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 975 { 976 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 977 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 978 const MatScalar *aa=a->a; 979 PetscScalar *x; 980 const PetscScalar *b; 981 PetscErrorCode ierr; 982 983 PetscFunctionBegin; 984 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 985 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 986 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 987 ierr = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 988 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 989 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 990 ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_inplace" 996 PetscErrorCode MatSolve_SeqSBAIJ_4_inplace(Mat A,Vec bb,Vec xx) 997 { 998 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 999 IS isrow=a->row; 1000 const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1001 PetscErrorCode ierr; 1002 const PetscInt *r,*vj; 1003 PetscInt nz,k,idx; 1004 const MatScalar *aa=a->a,*v,*diag; 1005 PetscScalar *x,x0,x1,x2,x3,*t,*tp; 1006 const PetscScalar *b; 1007 1008 PetscFunctionBegin; 1009 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1010 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1011 t = a->solve_work; 1012 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1013 1014 /* solve U^T * D * y = b by forward substitution */ 1015 tp = t; 1016 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1017 idx = 4*r[k]; 1018 tp[0] = b[idx]; 1019 tp[1] = b[idx+1]; 1020 tp[2] = b[idx+2]; 1021 tp[3] = b[idx+3]; 1022 tp += 4; 1023 } 1024 1025 for (k=0; k<mbs; k++) { 1026 v = aa + 16*ai[k]; 1027 vj = aj + ai[k]; 1028 tp = t + k*4; 1029 x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 1030 nz = ai[k+1] - ai[k]; 1031 1032 tp = t + (*vj)*4; 1033 while (nz--) { 1034 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1035 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1036 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1037 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1038 vj++; 1039 tp = t + (*vj)*4; 1040 v += 16; 1041 } 1042 1043 /* xk = inv(Dk)*(Dk*xk) */ 1044 diag = aa+k*16; /* ptr to inv(Dk) */ 1045 tp = t + k*4; 1046 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1047 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1048 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1049 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1050 } 1051 1052 /* solve U*x = y by back substitution */ 1053 for (k=mbs-1; k>=0; k--) { 1054 v = aa + 16*ai[k]; 1055 vj = aj + ai[k]; 1056 tp = t + k*4; 1057 x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 1058 nz = ai[k+1] - ai[k]; 1059 1060 tp = t + (*vj)*4; 1061 while (nz--) { 1062 /* xk += U(k,:)*x(:) */ 1063 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 1064 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 1065 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 1066 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 1067 vj++; 1068 tp = t + (*vj)*4; 1069 v += 16; 1070 } 1071 tp = t + k*4; 1072 tp[0] =x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 1073 idx = 4*r[k]; 1074 x[idx] = x0; 1075 x[idx+1] = x1; 1076 x[idx+2] = x2; 1077 x[idx+3] = x3; 1078 } 1079 1080 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1081 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1082 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1083 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering" 1089 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) 1090 { 1091 const MatScalar *v,*diag; 1092 PetscScalar *xp,x0,x1,x2,x3; 1093 PetscInt nz,k; 1094 const PetscInt *vj; 1095 1096 PetscFunctionBegin; 1097 for (k=0; k<mbs; k++) { 1098 v = aa + 16*ai[k]; 1099 xp = x + k*4; 1100 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 1101 nz = ai[k+1] - ai[k]; 1102 vj = aj + ai[k]; 1103 xp = x + (*vj)*4; 1104 PetscPrefetchBlock(vj+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 1105 PetscPrefetchBlock(v+16*nz,16*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 1106 while (nz--) { 1107 /* x(:) += U(k,:)^T*(Dk*xk) */ 1108 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1109 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1110 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1111 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1112 vj++; 1113 xp = x + (*vj)*4; 1114 v += 16; 1115 } 1116 /* xk = inv(Dk)*(Dk*xk) */ 1117 diag = aa+k*16; /* ptr to inv(Dk) */ 1118 xp = x + k*4; 1119 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1120 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1121 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1122 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1123 } 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering" 1129 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) 1130 { 1131 const MatScalar *v; 1132 PetscScalar *xp,x0,x1,x2,x3; 1133 PetscInt nz,k; 1134 const PetscInt *vj; 1135 1136 PetscFunctionBegin; 1137 for (k=mbs-1; k>=0; k--) { 1138 v = aa + 16*ai[k]; 1139 xp = x + k*4; 1140 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 1141 nz = ai[k+1] - ai[k]; 1142 vj = aj + ai[k]; 1143 xp = x + (*vj)*4; 1144 PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 1145 PetscPrefetchBlock(v-16*nz,16*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 1146 while (nz--) { 1147 /* xk += U(k,:)*x(:) */ 1148 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 1149 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 1150 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 1151 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 1152 vj++; 1153 v += 16; xp = x + (*vj)*4; 1154 } 1155 xp = x + k*4; 1156 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 1157 } 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering_inplace" 1163 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1164 { 1165 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 1166 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1167 const MatScalar *aa=a->a; 1168 PetscScalar *x; 1169 const PetscScalar *b; 1170 PetscErrorCode ierr; 1171 1172 PetscFunctionBegin; 1173 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1175 1176 /* solve U^T * D * y = b by forward substitution */ 1177 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1178 ierr = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1179 1180 /* solve U*x = y by back substitution */ 1181 ierr = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1182 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1183 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1184 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace" 1190 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1191 { 1192 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 1193 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1194 const MatScalar *aa=a->a; 1195 PetscScalar *x; 1196 const PetscScalar *b; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1201 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1202 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1203 ierr = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1204 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1205 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1206 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace" 1212 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1213 { 1214 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 1215 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 1216 const MatScalar *aa=a->a; 1217 PetscScalar *x; 1218 const PetscScalar *b; 1219 PetscErrorCode ierr; 1220 1221 PetscFunctionBegin; 1222 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1223 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1224 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1225 ierr = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1226 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1227 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1228 ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_inplace" 1234 PetscErrorCode MatSolve_SeqSBAIJ_3_inplace(Mat A,Vec bb,Vec xx) 1235 { 1236 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 1237 IS isrow=a->row; 1238 const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1239 PetscErrorCode ierr; 1240 const PetscInt *r; 1241 PetscInt nz,k,idx; 1242 const PetscInt *vj; 1243 const MatScalar *aa=a->a,*v,*diag; 1244 PetscScalar *x,x0,x1,x2,*t,*tp; 1245 const PetscScalar *b; 1246 1247 PetscFunctionBegin; 1248 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1249 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1250 t = a->solve_work; 1251 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1252 1253 /* solve U^T * D * y = b by forward substitution */ 1254 tp = t; 1255 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1256 idx = 3*r[k]; 1257 tp[0] = b[idx]; 1258 tp[1] = b[idx+1]; 1259 tp[2] = b[idx+2]; 1260 tp += 3; 1261 } 1262 1263 for (k=0; k<mbs; k++) { 1264 v = aa + 9*ai[k]; 1265 vj = aj + ai[k]; 1266 tp = t + k*3; 1267 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 1268 nz = ai[k+1] - ai[k]; 1269 1270 tp = t + (*vj)*3; 1271 while (nz--) { 1272 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1273 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1274 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1275 vj++; 1276 tp = t + (*vj)*3; 1277 v += 9; 1278 } 1279 1280 /* xk = inv(Dk)*(Dk*xk) */ 1281 diag = aa+k*9; /* ptr to inv(Dk) */ 1282 tp = t + k*3; 1283 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1284 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1285 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1286 } 1287 1288 /* solve U*x = y by back substitution */ 1289 for (k=mbs-1; k>=0; k--) { 1290 v = aa + 9*ai[k]; 1291 vj = aj + ai[k]; 1292 tp = t + k*3; 1293 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 1294 nz = ai[k+1] - ai[k]; 1295 1296 tp = t + (*vj)*3; 1297 while (nz--) { 1298 /* xk += U(k,:)*x(:) */ 1299 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 1300 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 1301 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 1302 vj++; 1303 tp = t + (*vj)*3; 1304 v += 9; 1305 } 1306 tp = t + k*3; 1307 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1308 idx = 3*r[k]; 1309 x[idx] = x0; 1310 x[idx+1] = x1; 1311 x[idx+2] = x2; 1312 } 1313 1314 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1315 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1316 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1317 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering" 1323 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) 1324 { 1325 const MatScalar *v,*diag; 1326 PetscScalar *xp,x0,x1,x2; 1327 PetscInt nz,k; 1328 const PetscInt *vj; 1329 1330 PetscFunctionBegin; 1331 for (k=0; k<mbs; k++) { 1332 v = aa + 9*ai[k]; 1333 xp = x + k*3; 1334 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1335 nz = ai[k+1] - ai[k]; 1336 vj = aj + ai[k]; 1337 xp = x + (*vj)*3; 1338 PetscPrefetchBlock(vj+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 1339 PetscPrefetchBlock(v+9*nz,9*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 1340 while (nz--) { 1341 /* x(:) += U(k,:)^T*(Dk*xk) */ 1342 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1343 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1344 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1345 vj++; 1346 xp = x + (*vj)*3; 1347 v += 9; 1348 } 1349 /* xk = inv(Dk)*(Dk*xk) */ 1350 diag = aa+k*9; /* ptr to inv(Dk) */ 1351 xp = x + k*3; 1352 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1353 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1354 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering" 1361 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) 1362 { 1363 const MatScalar *v; 1364 PetscScalar *xp,x0,x1,x2; 1365 PetscInt nz,k; 1366 const PetscInt *vj; 1367 1368 PetscFunctionBegin; 1369 for (k=mbs-1; k>=0; k--) { 1370 v = aa + 9*ai[k]; 1371 xp = x + k*3; 1372 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1373 nz = ai[k+1] - ai[k]; 1374 vj = aj + ai[k]; 1375 xp = x + (*vj)*3; 1376 PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 1377 PetscPrefetchBlock(v-9*nz,9*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 1378 while (nz--) { 1379 /* xk += U(k,:)*x(:) */ 1380 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1381 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1382 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1383 vj++; 1384 v += 9; xp = x + (*vj)*3; 1385 } 1386 xp = x + k*3; 1387 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering_inplace" 1394 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1395 { 1396 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 1397 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1398 const MatScalar *aa=a->a; 1399 PetscScalar *x; 1400 const PetscScalar *b; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1405 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1406 1407 /* solve U^T * D * y = b by forward substitution */ 1408 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1409 ierr = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1410 1411 /* solve U*x = y by back substitution */ 1412 ierr = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1413 1414 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1415 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1416 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace" 1422 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1423 { 1424 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 1425 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1426 const MatScalar *aa=a->a; 1427 PetscScalar *x; 1428 const PetscScalar *b; 1429 PetscErrorCode ierr; 1430 1431 PetscFunctionBegin; 1432 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1433 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1434 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1435 ierr = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1436 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1437 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1438 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace" 1444 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1445 { 1446 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 1447 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 1448 const MatScalar *aa=a->a; 1449 PetscScalar *x; 1450 const PetscScalar *b; 1451 PetscErrorCode ierr; 1452 1453 PetscFunctionBegin; 1454 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1455 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1456 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1457 ierr = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1458 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1459 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1460 ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_inplace" 1466 PetscErrorCode MatSolve_SeqSBAIJ_2_inplace(Mat A,Vec bb,Vec xx) 1467 { 1468 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 1469 IS isrow=a->row; 1470 const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1471 PetscErrorCode ierr; 1472 const PetscInt *r,*vj; 1473 PetscInt nz,k,k2,idx; 1474 const MatScalar *aa=a->a,*v,*diag; 1475 PetscScalar *x,x0,x1,*t; 1476 const PetscScalar *b; 1477 1478 PetscFunctionBegin; 1479 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1480 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1481 t = a->solve_work; 1482 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1483 1484 /* solve U^T * D * y = perm(b) by forward substitution */ 1485 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1486 idx = 2*r[k]; 1487 t[k*2] = b[idx]; 1488 t[k*2+1] = b[idx+1]; 1489 } 1490 for (k=0; k<mbs; k++) { 1491 v = aa + 4*ai[k]; 1492 vj = aj + ai[k]; 1493 k2 = k*2; 1494 x0 = t[k2]; x1 = t[k2+1]; 1495 nz = ai[k+1] - ai[k]; 1496 while (nz--) { 1497 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1498 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1499 vj++; v += 4; 1500 } 1501 diag = aa+k*4; /* ptr to inv(Dk) */ 1502 t[k2] = diag[0]*x0 + diag[2]*x1; 1503 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1504 } 1505 1506 /* solve U*x = y by back substitution */ 1507 for (k=mbs-1; k>=0; k--) { 1508 v = aa + 4*ai[k]; 1509 vj = aj + ai[k]; 1510 k2 = k*2; 1511 x0 = t[k2]; x1 = t[k2+1]; 1512 nz = ai[k+1] - ai[k]; 1513 while (nz--) { 1514 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1515 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1516 vj++; 1517 v += 4; 1518 } 1519 t[k2] = x0; 1520 t[k2+1] = x1; 1521 idx = 2*r[k]; 1522 x[idx] = x0; 1523 x[idx+1] = x1; 1524 } 1525 1526 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1527 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1528 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1529 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering" 1535 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) 1536 { 1537 const MatScalar *v,*diag; 1538 PetscScalar x0,x1; 1539 PetscInt nz,k,k2; 1540 const PetscInt *vj; 1541 1542 PetscFunctionBegin; 1543 for (k=0; k<mbs; k++) { 1544 v = aa + 4*ai[k]; 1545 vj = aj + ai[k]; 1546 k2 = k*2; 1547 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1548 nz = ai[k+1] - ai[k]; 1549 PetscPrefetchBlock(vj+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 1550 PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 1551 while (nz--) { 1552 /* x(:) += U(k,:)^T*(Dk*xk) */ 1553 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1554 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1555 vj++; v += 4; 1556 } 1557 /* xk = inv(Dk)*(Dk*xk) */ 1558 diag = aa+k*4; /* ptr to inv(Dk) */ 1559 x[k2] = diag[0]*x0 + diag[2]*x1; 1560 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering" 1567 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) 1568 { 1569 const MatScalar *v; 1570 PetscScalar x0,x1; 1571 PetscInt nz,k,k2; 1572 const PetscInt *vj; 1573 1574 PetscFunctionBegin; 1575 for (k=mbs-1; k>=0; k--) { 1576 v = aa + 4*ai[k]; 1577 vj = aj + ai[k]; 1578 k2 = k*2; 1579 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1580 nz = ai[k+1] - ai[k]; 1581 PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 1582 PetscPrefetchBlock(v-4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 1583 while (nz--) { 1584 /* xk += U(k,:)*x(:) */ 1585 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1586 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1587 vj++; 1588 v += 4; 1589 } 1590 x[k2] = x0; 1591 x[k2+1] = x1; 1592 } 1593 PetscFunctionReturn(0); 1594 } 1595 1596 #undef __FUNCT__ 1597 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace" 1598 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1599 { 1600 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 1601 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1602 const MatScalar *aa=a->a; 1603 PetscScalar *x; 1604 const PetscScalar *b; 1605 PetscErrorCode ierr; 1606 1607 PetscFunctionBegin; 1608 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1609 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1610 1611 /* solve U^T * D * y = b by forward substitution */ 1612 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1613 ierr = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1614 1615 /* solve U*x = y by back substitution */ 1616 ierr = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1617 1618 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1619 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1620 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 1621 PetscFunctionReturn(0); 1622 } 1623 1624 #undef __FUNCT__ 1625 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace" 1626 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1627 { 1628 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 1629 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1630 const MatScalar *aa=a->a; 1631 PetscScalar *x; 1632 const PetscScalar *b; 1633 PetscErrorCode ierr; 1634 1635 PetscFunctionBegin; 1636 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1637 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1638 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1639 ierr = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1640 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1641 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1642 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 1643 PetscFunctionReturn(0); 1644 } 1645 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace" 1648 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1649 { 1650 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; 1651 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 1652 const MatScalar *aa=a->a; 1653 PetscScalar *x; 1654 const PetscScalar *b; 1655 PetscErrorCode ierr; 1656 1657 PetscFunctionBegin; 1658 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1659 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1660 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1661 ierr = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1662 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1663 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1664 ierr = PetscLogFlops(2.0*bs2*(a->nz - mbs));CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1670 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1671 { 1672 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1673 IS isrow=a->row; 1674 PetscErrorCode ierr; 1675 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag = a->diag; 1676 const MatScalar *aa=a->a,*v; 1677 const PetscScalar *b; 1678 PetscScalar *x,xk,*t; 1679 PetscInt nz,k,j; 1680 1681 PetscFunctionBegin; 1682 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1683 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1684 t = a->solve_work; 1685 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1686 1687 /* solve U^T*D*y = perm(b) by forward substitution */ 1688 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1689 for (k=0; k<mbs; k++) { 1690 v = aa + ai[k]; 1691 vj = aj + ai[k]; 1692 xk = t[k]; 1693 nz = ai[k+1] - ai[k] - 1; 1694 for (j=0; j<nz; j++) t[vj[j]] += v[j]*xk; 1695 t[k] = xk*v[nz]; /* v[nz] = 1/D(k) */ 1696 } 1697 1698 /* solve U*perm(x) = y by back substitution */ 1699 for (k=mbs-1; k>=0; k--) { 1700 v = aa + adiag[k] - 1; 1701 vj = aj + adiag[k] - 1; 1702 nz = ai[k+1] - ai[k] - 1; 1703 for (j=0; j<nz; j++) t[k] += v[-j]*t[vj[-j]]; 1704 x[rp[k]] = t[k]; 1705 } 1706 1707 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1708 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1709 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1710 ierr = PetscLogFlops(4.0*a->nz - 3.0*mbs);CHKERRQ(ierr); 1711 PetscFunctionReturn(0); 1712 } 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_inplace" 1716 PetscErrorCode MatSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx) 1717 { 1718 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1719 IS isrow=a->row; 1720 PetscErrorCode ierr; 1721 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1722 const MatScalar *aa=a->a,*v; 1723 PetscScalar *x,xk,*t; 1724 const PetscScalar *b; 1725 PetscInt nz,k; 1726 1727 PetscFunctionBegin; 1728 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1729 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1730 t = a->solve_work; 1731 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1732 1733 /* solve U^T*D*y = perm(b) by forward substitution */ 1734 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1735 for (k=0; k<mbs; k++) { 1736 v = aa + ai[k] + 1; 1737 vj = aj + ai[k] + 1; 1738 xk = t[k]; 1739 nz = ai[k+1] - ai[k] - 1; 1740 while (nz--) t[*vj++] += (*v++) * xk; 1741 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1742 } 1743 1744 /* solve U*perm(x) = y by back substitution */ 1745 for (k=mbs-1; k>=0; k--) { 1746 v = aa + ai[k] + 1; 1747 vj = aj + ai[k] + 1; 1748 nz = ai[k+1] - ai[k] - 1; 1749 while (nz--) t[k] += (*v++) * t[*vj++]; 1750 x[rp[k]] = t[k]; 1751 } 1752 1753 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1754 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1755 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1756 ierr = PetscLogFlops(4.0*a->nz - 3*mbs);CHKERRQ(ierr); 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1" 1762 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1763 { 1764 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1765 IS isrow=a->row; 1766 PetscErrorCode ierr; 1767 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag = a->diag; 1768 const MatScalar *aa=a->a,*v; 1769 PetscReal diagk; 1770 PetscScalar *x,xk; 1771 const PetscScalar *b; 1772 PetscInt nz,k; 1773 1774 PetscFunctionBegin; 1775 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ 1776 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1777 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1778 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1779 1780 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1781 for (k=0; k<mbs; k++) { 1782 v = aa + ai[k]; 1783 vj = aj + ai[k]; 1784 xk = x[k]; 1785 nz = ai[k+1] - ai[k] - 1; 1786 while (nz--) x[*vj++] += (*v++) * xk; 1787 1788 diagk = PetscRealPart(aa[adiag[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1789 if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1790 x[k] = xk*PetscSqrtReal(diagk); 1791 } 1792 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1793 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1794 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1795 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_inplace" 1801 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx) 1802 { 1803 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1804 IS isrow=a->row; 1805 PetscErrorCode ierr; 1806 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1807 const MatScalar *aa=a->a,*v; 1808 PetscReal diagk; 1809 PetscScalar *x,xk; 1810 const PetscScalar *b; 1811 PetscInt nz,k; 1812 1813 PetscFunctionBegin; 1814 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ 1815 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1816 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1817 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1818 1819 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1820 for (k=0; k<mbs; k++) { 1821 v = aa + ai[k] + 1; 1822 vj = aj + ai[k] + 1; 1823 xk = x[k]; 1824 nz = ai[k+1] - ai[k] - 1; 1825 while (nz--) x[*vj++] += (*v++) * xk; 1826 1827 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1828 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1829 x[k] = xk*PetscSqrtReal(diagk); 1830 } 1831 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1832 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1833 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1834 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1" 1840 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1841 { 1842 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1843 IS isrow=a->row; 1844 PetscErrorCode ierr; 1845 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag = a->diag; 1846 const MatScalar *aa=a->a,*v; 1847 PetscReal diagk; 1848 PetscScalar *x,*t; 1849 const PetscScalar *b; 1850 PetscInt nz,k; 1851 1852 PetscFunctionBegin; 1853 /* solve D^(1/2)*U*perm(x) = b by back substitution */ 1854 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1855 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1856 t = a->solve_work; 1857 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1858 1859 for (k=mbs-1; k>=0; k--) { 1860 v = aa + ai[k]; 1861 vj = aj + ai[k]; 1862 diagk = PetscRealPart(aa[adiag[k]]); 1863 if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1864 t[k] = b[k] * PetscSqrtReal(diagk); 1865 nz = ai[k+1] - ai[k] - 1; 1866 while (nz--) t[k] += (*v++) * t[*vj++]; 1867 x[rp[k]] = t[k]; 1868 } 1869 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1870 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1871 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1872 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_inplace" 1878 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx) 1879 { 1880 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1881 IS isrow=a->row; 1882 PetscErrorCode ierr; 1883 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1884 const MatScalar *aa=a->a,*v; 1885 PetscReal diagk; 1886 PetscScalar *x,*t; 1887 const PetscScalar *b; 1888 PetscInt nz,k; 1889 1890 PetscFunctionBegin; 1891 /* solve D^(1/2)*U*perm(x) = b by back substitution */ 1892 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1893 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1894 t = a->solve_work; 1895 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1896 1897 for (k=mbs-1; k>=0; k--) { 1898 v = aa + ai[k] + 1; 1899 vj = aj + ai[k] + 1; 1900 diagk = PetscRealPart(aa[ai[k]]); 1901 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1902 t[k] = b[k] * PetscSqrtReal(diagk); 1903 nz = ai[k+1] - ai[k] - 1; 1904 while (nz--) t[k] += (*v++) * t[*vj++]; 1905 x[rp[k]] = t[k]; 1906 } 1907 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1908 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1909 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1910 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1916 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1917 { 1918 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1919 PetscErrorCode ierr; 1920 1921 PetscFunctionBegin; 1922 if (A->rmap->bs == 1) { 1923 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1924 } else { 1925 IS isrow=a->row; 1926 const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1927 const MatScalar *aa=a->a,*v; 1928 PetscScalar *x,*t; 1929 const PetscScalar *b; 1930 PetscInt nz,k,n,i,j; 1931 1932 if (bb->n > a->solves_work_n) { 1933 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1934 ierr = PetscMalloc1(bb->n*A->rmap->N,&a->solves_work);CHKERRQ(ierr); 1935 a->solves_work_n = bb->n; 1936 } 1937 n = bb->n; 1938 ierr = VecGetArrayRead(bb->v,&b);CHKERRQ(ierr); 1939 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1940 t = a->solves_work; 1941 1942 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1943 1944 /* solve U^T*D*y = perm(b) by forward substitution */ 1945 for (k=0; k<mbs; k++) { 1946 for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs]; /* values are stored interlaced in t */ 1947 } 1948 for (k=0; k<mbs; k++) { 1949 v = aa + ai[k]; 1950 vj = aj + ai[k]; 1951 nz = ai[k+1] - ai[k] - 1; 1952 for (j=0; j<nz; j++) { 1953 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1954 v++;vj++; 1955 } 1956 for (i=0; i<n; i++) t[n*k+i] *= aa[nz]; /* note: aa[nz] = 1/D(k) */ 1957 } 1958 1959 /* solve U*perm(x) = y by back substitution */ 1960 for (k=mbs-1; k>=0; k--) { 1961 v = aa + ai[k] - 1; 1962 vj = aj + ai[k] - 1; 1963 nz = ai[k+1] - ai[k] - 1; 1964 for (j=0; j<nz; j++) { 1965 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1966 v++;vj++; 1967 } 1968 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i]; 1969 } 1970 1971 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1972 ierr = VecRestoreArrayRead(bb->v,&b);CHKERRQ(ierr); 1973 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1974 ierr = PetscLogFlops(bb->n*(4.0*a->nz - 3.0*mbs));CHKERRQ(ierr); 1975 } 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatSolves_SeqSBAIJ_1_inplace" 1981 PetscErrorCode MatSolves_SeqSBAIJ_1_inplace(Mat A,Vecs bb,Vecs xx) 1982 { 1983 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1984 PetscErrorCode ierr; 1985 1986 PetscFunctionBegin; 1987 if (A->rmap->bs == 1) { 1988 ierr = MatSolve_SeqSBAIJ_1_inplace(A,bb->v,xx->v);CHKERRQ(ierr); 1989 } else { 1990 IS isrow=a->row; 1991 const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1992 const MatScalar *aa=a->a,*v; 1993 PetscScalar *x,*t; 1994 const PetscScalar *b; 1995 PetscInt nz,k,n,i; 1996 1997 if (bb->n > a->solves_work_n) { 1998 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1999 ierr = PetscMalloc1(bb->n*A->rmap->N,&a->solves_work);CHKERRQ(ierr); 2000 a->solves_work_n = bb->n; 2001 } 2002 n = bb->n; 2003 ierr = VecGetArrayRead(bb->v,&b);CHKERRQ(ierr); 2004 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 2005 t = a->solves_work; 2006 2007 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 2008 2009 /* solve U^T*D*y = perm(b) by forward substitution */ 2010 for (k=0; k<mbs; k++) { 2011 for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs]; /* values are stored interlaced in t */ 2012 } 2013 for (k=0; k<mbs; k++) { 2014 v = aa + ai[k]; 2015 vj = aj + ai[k]; 2016 nz = ai[k+1] - ai[k]; 2017 while (nz--) { 2018 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 2019 v++;vj++; 2020 } 2021 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 2022 } 2023 2024 /* solve U*perm(x) = y by back substitution */ 2025 for (k=mbs-1; k>=0; k--) { 2026 v = aa + ai[k]; 2027 vj = aj + ai[k]; 2028 nz = ai[k+1] - ai[k]; 2029 while (nz--) { 2030 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 2031 v++;vj++; 2032 } 2033 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i]; 2034 } 2035 2036 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 2037 ierr = VecRestoreArrayRead(bb->v,&b);CHKERRQ(ierr); 2038 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 2039 ierr = PetscLogFlops(bb->n*(4.0*a->nz - 3.0*mbs));CHKERRQ(ierr); 2040 } 2041 PetscFunctionReturn(0); 2042 } 2043 2044 #undef __FUNCT__ 2045 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 2046 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 2047 { 2048 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2049 PetscErrorCode ierr; 2050 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj,*adiag = a->diag; 2051 const MatScalar *aa=a->a,*v; 2052 const PetscScalar *b; 2053 PetscScalar *x,xi; 2054 PetscInt nz,i,j; 2055 2056 PetscFunctionBegin; 2057 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2058 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2059 2060 /* solve U^T*D*y = b by forward substitution */ 2061 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 2062 for (i=0; i<mbs; i++) { 2063 v = aa + ai[i]; 2064 vj = aj + ai[i]; 2065 xi = x[i]; 2066 nz = ai[i+1] - ai[i] - 1; /* exclude diag[i] */ 2067 for (j=0; j<nz; j++) x[vj[j]] += v[j]* xi; 2068 x[i] = xi*v[nz]; /* v[nz] = aa[diag[i]] = 1/D(i) */ 2069 } 2070 2071 /* solve U*x = y by backward substitution */ 2072 for (i=mbs-2; i>=0; i--) { 2073 xi = x[i]; 2074 v = aa + adiag[i] - 1; /* end of row i, excluding diag */ 2075 vj = aj + adiag[i] - 1; 2076 nz = ai[i+1] - ai[i] - 1; 2077 for (j=0; j<nz; j++) xi += v[-j]*x[vj[-j]]; 2078 x[i] = xi; 2079 } 2080 2081 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2082 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2083 ierr = PetscLogFlops(4.0*a->nz - 3*mbs);CHKERRQ(ierr); 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace" 2089 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 2090 { 2091 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2092 PetscErrorCode ierr; 2093 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj; 2094 const MatScalar *aa=a->a,*v; 2095 PetscScalar *x,xk; 2096 const PetscScalar *b; 2097 PetscInt nz,k; 2098 2099 PetscFunctionBegin; 2100 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2101 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2102 2103 /* solve U^T*D*y = b by forward substitution */ 2104 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 2105 for (k=0; k<mbs; k++) { 2106 v = aa + ai[k] + 1; 2107 vj = aj + ai[k] + 1; 2108 xk = x[k]; 2109 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 2110 while (nz--) x[*vj++] += (*v++) * xk; 2111 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 2112 } 2113 2114 /* solve U*x = y by back substitution */ 2115 for (k=mbs-2; k>=0; k--) { 2116 v = aa + ai[k] + 1; 2117 vj = aj + ai[k] + 1; 2118 xk = x[k]; 2119 nz = ai[k+1] - ai[k] - 1; 2120 while (nz--) { 2121 xk += (*v++) * x[*vj++]; 2122 } 2123 x[k] = xk; 2124 } 2125 2126 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2127 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2128 ierr = PetscLogFlops(4.0*a->nz - 3*mbs);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 #undef __FUNCT__ 2133 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" 2134 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 2135 { 2136 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2137 PetscErrorCode ierr; 2138 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*adiag = a->diag,*vj; 2139 const MatScalar *aa=a->a,*v; 2140 PetscReal diagk; 2141 PetscScalar *x; 2142 const PetscScalar *b; 2143 PetscInt nz,k; 2144 2145 PetscFunctionBegin; 2146 /* solve U^T*D^(1/2)*x = b by forward substitution */ 2147 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2148 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2149 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 2150 for (k=0; k<mbs; k++) { 2151 v = aa + ai[k]; 2152 vj = aj + ai[k]; 2153 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 2154 while (nz--) x[*vj++] += (*v++) * x[k]; 2155 diagk = PetscRealPart(aa[adiag[k]]); /* note: aa[adiag[k]] = 1/D(k) */ 2156 if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[adiag[k]]),PetscImaginaryPart(aa[adiag[k]])); 2157 x[k] *= PetscSqrtReal(diagk); 2158 } 2159 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2160 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2161 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 2162 PetscFunctionReturn(0); 2163 } 2164 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace" 2167 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 2168 { 2169 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2170 PetscErrorCode ierr; 2171 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj; 2172 const MatScalar *aa=a->a,*v; 2173 PetscReal diagk; 2174 PetscScalar *x; 2175 const PetscScalar *b; 2176 PetscInt nz,k; 2177 2178 PetscFunctionBegin; 2179 /* solve U^T*D^(1/2)*x = b by forward substitution */ 2180 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2181 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2182 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 2183 for (k=0; k<mbs; k++) { 2184 v = aa + ai[k] + 1; 2185 vj = aj + ai[k] + 1; 2186 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 2187 while (nz--) x[*vj++] += (*v++) * x[k]; 2188 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 2189 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]])); 2190 x[k] *= PetscSqrtReal(diagk); 2191 } 2192 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2193 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2194 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 2195 PetscFunctionReturn(0); 2196 } 2197 2198 #undef __FUNCT__ 2199 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" 2200 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 2201 { 2202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2203 PetscErrorCode ierr; 2204 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*adiag = a->diag,*vj; 2205 const MatScalar *aa=a->a,*v; 2206 PetscReal diagk; 2207 PetscScalar *x; 2208 const PetscScalar *b; 2209 PetscInt nz,k; 2210 2211 PetscFunctionBegin; 2212 /* solve D^(1/2)*U*x = b by back substitution */ 2213 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2214 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2215 2216 for (k=mbs-1; k>=0; k--) { 2217 v = aa + ai[k]; 2218 vj = aj + ai[k]; 2219 diagk = PetscRealPart(aa[adiag[k]]); /* note: aa[diag[k]] = 1/D(k) */ 2220 if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 2221 x[k] = PetscSqrtReal(diagk)*b[k]; 2222 nz = ai[k+1] - ai[k] - 1; 2223 while (nz--) x[k] += (*v++) * x[*vj++]; 2224 } 2225 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2226 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2227 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 2228 PetscFunctionReturn(0); 2229 } 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace" 2233 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 2234 { 2235 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2236 PetscErrorCode ierr; 2237 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj; 2238 const MatScalar *aa=a->a,*v; 2239 PetscReal diagk; 2240 PetscScalar *x; 2241 const PetscScalar *b; 2242 PetscInt nz,k; 2243 2244 PetscFunctionBegin; 2245 /* solve D^(1/2)*U*x = b by back substitution */ 2246 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2247 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2248 2249 for (k=mbs-1; k>=0; k--) { 2250 v = aa + ai[k] + 1; 2251 vj = aj + ai[k] + 1; 2252 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 2253 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 2254 x[k] = PetscSqrtReal(diagk)*b[k]; 2255 nz = ai[k+1] - ai[k] - 1; 2256 while (nz--) x[k] += (*v++) * x[*vj++]; 2257 } 2258 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2259 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2260 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 2261 PetscFunctionReturn(0); 2262 } 2263 2264 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 2267 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat B,Mat A,IS perm,const MatFactorInfo *info) 2268 { 2269 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 2270 PetscErrorCode ierr; 2271 const PetscInt *rip,mbs = a->mbs,*ai,*aj; 2272 PetscInt *jutmp,bs = A->rmap->bs,bs2=a->bs2,i; 2273 PetscInt m,reallocs = 0,*levtmp; 2274 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 2275 PetscInt incrlev,*lev,shift,prow,nz; 2276 PetscReal f = info->fill,levels = info->levels; 2277 PetscBool perm_identity; 2278 2279 PetscFunctionBegin; 2280 /* check whether perm is the identity mapping */ 2281 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2282 2283 if (perm_identity) { 2284 a->permute = PETSC_FALSE; 2285 ai = a->i; aj = a->j; 2286 } else { /* non-trivial permutation */ 2287 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format"); 2288 a->permute = PETSC_TRUE; 2289 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2290 ai = a->inew; aj = a->jnew; 2291 } 2292 2293 /* initialization */ 2294 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2295 umax = (PetscInt)(f*ai[mbs] + 1); 2296 ierr = PetscMalloc1(umax,&lev);CHKERRQ(ierr); 2297 umax += mbs + 1; 2298 shift = mbs + 1; 2299 ierr = PetscMalloc1(mbs+1,&iu);CHKERRQ(ierr); 2300 ierr = PetscMalloc1(umax,&ju);CHKERRQ(ierr); 2301 iu[0] = mbs + 1; 2302 juidx = mbs + 1; 2303 /* prowl: linked list for pivot row */ 2304 ierr = PetscMalloc3(mbs,&prowl,mbs,&q,mbs,&levtmp);CHKERRQ(ierr); 2305 /* q: linked list for col index */ 2306 2307 for (i=0; i<mbs; i++) { 2308 prowl[i] = mbs; 2309 q[i] = 0; 2310 } 2311 2312 /* for each row k */ 2313 for (k=0; k<mbs; k++) { 2314 nzk = 0; 2315 q[k] = mbs; 2316 /* copy current row into linked list */ 2317 nz = ai[rip[k]+1] - ai[rip[k]]; 2318 j = ai[rip[k]]; 2319 while (nz--) { 2320 vj = rip[aj[j++]]; 2321 if (vj > k) { 2322 qm = k; 2323 do { 2324 m = qm; qm = q[m]; 2325 } while (qm < vj); 2326 if (qm == vj) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Duplicate entry in A\n"); 2327 nzk++; 2328 q[m] = vj; 2329 q[vj] = qm; 2330 levtmp[vj] = 0; /* initialize lev for nonzero element */ 2331 } 2332 } 2333 2334 /* modify nonzero structure of k-th row by computing fill-in 2335 for each row prow to be merged in */ 2336 prow = k; 2337 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 2338 2339 while (prow < k) { 2340 /* merge row prow into k-th row */ 2341 jmin = iu[prow] + 1; 2342 jmax = iu[prow+1]; 2343 qm = k; 2344 for (j=jmin; j<jmax; j++) { 2345 incrlev = lev[j-shift] + 1; 2346 if (incrlev > levels) continue; 2347 vj = ju[j]; 2348 do { 2349 m = qm; qm = q[m]; 2350 } while (qm < vj); 2351 if (qm != vj) { /* a fill */ 2352 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 2353 levtmp[vj] = incrlev; 2354 } else if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 2355 } 2356 prow = prowl[prow]; /* next pivot row */ 2357 } 2358 2359 /* add k to row list for first nonzero element in k-th row */ 2360 if (nzk > 1) { 2361 i = q[k]; /* col value of first nonzero element in k_th row of U */ 2362 prowl[k] = prowl[i]; prowl[i] = k; 2363 } 2364 iu[k+1] = iu[k] + nzk; 2365 2366 /* allocate more space to ju and lev if needed */ 2367 if (iu[k+1] > umax) { 2368 /* estimate how much additional space we will need */ 2369 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 2370 /* just double the memory each time */ 2371 maxadd = umax; 2372 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 2373 umax += maxadd; 2374 2375 /* allocate a longer ju */ 2376 ierr = PetscMalloc1(umax,&jutmp);CHKERRQ(ierr); 2377 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 2378 ierr = PetscFree(ju);CHKERRQ(ierr); 2379 ju = jutmp; 2380 2381 ierr = PetscMalloc1(umax,&jutmp);CHKERRQ(ierr); 2382 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 2383 ierr = PetscFree(lev);CHKERRQ(ierr); 2384 lev = jutmp; 2385 reallocs += 2; /* count how many times we realloc */ 2386 } 2387 2388 /* save nonzero structure of k-th row in ju */ 2389 i=k; 2390 while (nzk--) { 2391 i = q[i]; 2392 ju[juidx] = i; 2393 lev[juidx-shift] = levtmp[i]; 2394 juidx++; 2395 } 2396 } 2397 2398 #if defined(PETSC_USE_INFO) 2399 if (ai[mbs] != 0) { 2400 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2401 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 2402 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2403 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 2404 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2405 } else { 2406 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2407 } 2408 #endif 2409 2410 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2411 ierr = PetscFree3(prowl,q,levtmp);CHKERRQ(ierr); 2412 ierr = PetscFree(lev);CHKERRQ(ierr); 2413 2414 /* put together the new matrix */ 2415 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,NULL);CHKERRQ(ierr); 2416 2417 /* ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)iperm);CHKERRQ(ierr); */ 2418 b = (Mat_SeqSBAIJ*)(B)->data; 2419 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2420 2421 b->singlemalloc = PETSC_FALSE; 2422 b->free_a = PETSC_TRUE; 2423 b->free_ij = PETSC_TRUE; 2424 /* the next line frees the default space generated by the Create() */ 2425 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 2426 ierr = PetscMalloc1((iu[mbs]+1)*bs2,&b->a);CHKERRQ(ierr); 2427 b->j = ju; 2428 b->i = iu; 2429 b->diag = 0; 2430 b->ilen = 0; 2431 b->imax = 0; 2432 2433 ierr = ISDestroy(&b->row);CHKERRQ(ierr); 2434 ierr = ISDestroy(&b->icol);CHKERRQ(ierr); 2435 b->row = perm; 2436 b->icol = perm; 2437 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2438 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2439 ierr = PetscMalloc1(bs*mbs+bs,&b->solve_work);CHKERRQ(ierr); 2440 /* In b structure: Free imax, ilen, old a, old j. 2441 Allocate idnew, solve_work, new a, new j */ 2442 ierr = PetscLogObjectMemory((PetscObject)B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2443 b->maxnz = b->nz = iu[mbs]; 2444 2445 (B)->info.factor_mallocs = reallocs; 2446 (B)->info.fill_ratio_given = f; 2447 if (ai[mbs] != 0) { 2448 (B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2449 } else { 2450 (B)->info.fill_ratio_needed = 0.0; 2451 } 2452 ierr = MatSeqSBAIJSetNumericFactorization_inplace(B,perm_identity);CHKERRQ(ierr); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 /* 2457 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure 2458 */ 2459 #include <petscbt.h> 2460 #include <../src/mat/utils/freespace.h> 2461 #undef __FUNCT__ 2462 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 2463 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2464 { 2465 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 2466 PetscErrorCode ierr; 2467 PetscBool perm_identity,free_ij = PETSC_TRUE,missing; 2468 PetscInt bs=A->rmap->bs,am=a->mbs,d,*ai=a->i,*aj= a->j; 2469 const PetscInt *rip; 2470 PetscInt reallocs=0,i,*ui,*udiag,*cols; 2471 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2472 PetscInt nlnk,*lnk,*lnk_lvl=NULL,ncols,*uj,**uj_ptr,**uj_lvl_ptr; 2473 PetscReal fill =info->fill,levels=info->levels; 2474 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2475 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2476 PetscBT lnkbt; 2477 2478 PetscFunctionBegin; 2479 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2480 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2481 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2482 if (bs > 1) { 2483 ierr = MatICCFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info);CHKERRQ(ierr); 2484 PetscFunctionReturn(0); 2485 } 2486 2487 /* check whether perm is the identity mapping */ 2488 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2489 if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format"); 2490 a->permute = PETSC_FALSE; 2491 2492 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2493 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2494 ui[0] = 0; 2495 2496 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2497 if (!levels) { 2498 /* reuse the column pointers and row offsets for memory savings */ 2499 for (i=0; i<am; i++) { 2500 ncols = ai[i+1] - ai[i]; 2501 ui[i+1] = ui[i] + ncols; 2502 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2503 } 2504 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2505 cols = uj; 2506 for (i=0; i<am; i++) { 2507 aj = a->j + ai[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2508 ncols = ai[i+1] - ai[i] -1; 2509 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2510 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2511 } 2512 } else { /* case: levels>0 */ 2513 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2514 2515 /* initialization */ 2516 /* jl: linked list for storing indices of the pivot rows 2517 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2518 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&il,am,&jl);CHKERRQ(ierr); 2519 for (i=0; i<am; i++) { 2520 jl[i] = am; il[i] = 0; 2521 } 2522 2523 /* create and initialize a linked list for storing column indices of the active row k */ 2524 nlnk = am + 1; 2525 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2526 2527 /* initial FreeSpace size is fill*(ai[am]+1) */ 2528 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2529 2530 current_space = free_space; 2531 2532 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2533 2534 current_space_lvl = free_space_lvl; 2535 2536 for (k=0; k<am; k++) { /* for each active row k */ 2537 /* initialize lnk by the column indices of row k */ 2538 nzk = 0; 2539 ncols = ai[k+1] - ai[k]; 2540 if (!ncols) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row %D in matrix ",k); 2541 cols = aj+ai[k]; 2542 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2543 nzk += nlnk; 2544 2545 /* update lnk by computing fill-in for each pivot row to be merged in */ 2546 prow = jl[k]; /* 1st pivot row */ 2547 2548 while (prow < k) { 2549 nextprow = jl[prow]; 2550 2551 /* merge prow into k-th row */ 2552 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2553 jmax = ui[prow+1]; 2554 ncols = jmax-jmin; 2555 i = jmin - ui[prow]; 2556 2557 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2558 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2559 j = *(uj - 1); 2560 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2561 nzk += nlnk; 2562 2563 /* update il and jl for prow */ 2564 if (jmin < jmax) { 2565 il[prow] = jmin; 2566 2567 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2568 } 2569 prow = nextprow; 2570 } 2571 2572 /* if free space is not available, make more free space */ 2573 if (current_space->local_remaining<nzk) { 2574 i = am - k + 1; /* num of unfactored rows */ 2575 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2576 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2577 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2578 reallocs++; 2579 } 2580 2581 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2582 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2583 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2584 2585 /* add the k-th row into il and jl */ 2586 if (nzk > 1) { 2587 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2588 jl[k] = jl[i]; jl[i] = k; 2589 il[k] = ui[k] + 1; 2590 } 2591 uj_ptr[k] = current_space->array; 2592 uj_lvl_ptr[k] = current_space_lvl->array; 2593 2594 current_space->array += nzk; 2595 current_space->local_used += nzk; 2596 current_space->local_remaining -= nzk; 2597 current_space_lvl->array += nzk; 2598 current_space_lvl->local_used += nzk; 2599 current_space_lvl->local_remaining -= nzk; 2600 2601 ui[k+1] = ui[k] + nzk; 2602 } 2603 2604 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2605 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);CHKERRQ(ierr); 2606 2607 /* destroy list of free space and other temporary array(s) */ 2608 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2609 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2610 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2611 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2612 2613 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2614 2615 /* put together the new matrix in MATSEQSBAIJ format */ 2616 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2617 2618 b = (Mat_SeqSBAIJ*)(fact)->data; 2619 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2620 2621 b->singlemalloc = PETSC_FALSE; 2622 b->free_a = PETSC_TRUE; 2623 b->free_ij = free_ij; 2624 2625 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2626 2627 b->j = uj; 2628 b->i = ui; 2629 b->diag = udiag; 2630 b->free_diag = PETSC_TRUE; 2631 b->ilen = 0; 2632 b->imax = 0; 2633 b->row = perm; 2634 b->col = perm; 2635 2636 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2637 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2638 2639 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2640 2641 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2642 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2643 2644 b->maxnz = b->nz = ui[am]; 2645 2646 fact->info.factor_mallocs = reallocs; 2647 fact->info.fill_ratio_given = fill; 2648 if (ai[am] != 0) { 2649 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/ai[am]; 2650 } else { 2651 fact->info.fill_ratio_needed = 0.0; 2652 } 2653 #if defined(PETSC_USE_INFO) 2654 if (ai[am] != 0) { 2655 PetscReal af = fact->info.fill_ratio_needed; 2656 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2657 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2658 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2659 } else { 2660 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2661 } 2662 #endif 2663 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 2664 PetscFunctionReturn(0); 2665 } 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_inplace" 2669 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2670 { 2671 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2672 Mat_SeqSBAIJ *b; 2673 PetscErrorCode ierr; 2674 PetscBool perm_identity,free_ij = PETSC_TRUE; 2675 PetscInt bs=A->rmap->bs,am=a->mbs; 2676 const PetscInt *cols,*rip,*ai=a->i,*aj=a->j; 2677 PetscInt reallocs=0,i,*ui; 2678 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2679 PetscInt nlnk,*lnk,*lnk_lvl=NULL,ncols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 2680 PetscReal fill =info->fill,levels=info->levels,ratio_needed; 2681 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2682 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2683 PetscBT lnkbt; 2684 2685 PetscFunctionBegin; 2686 /* 2687 This code originally uses Modified Sparse Row (MSR) storage 2688 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 2689 Then it is rewritten so the factor B takes seqsbaij format. However the associated 2690 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 2691 thus the original code in MSR format is still used for these cases. 2692 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 2693 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 2694 */ 2695 if (bs > 1) { 2696 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr); 2697 PetscFunctionReturn(0); 2698 } 2699 2700 /* check whether perm is the identity mapping */ 2701 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2702 if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format"); 2703 a->permute = PETSC_FALSE; 2704 2705 /* special case that simply copies fill pattern */ 2706 if (!levels) { 2707 /* reuse the column pointers and row offsets for memory savings */ 2708 ui = a->i; 2709 uj = a->j; 2710 free_ij = PETSC_FALSE; 2711 ratio_needed = 1.0; 2712 } else { /* case: levels>0 */ 2713 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2714 2715 /* initialization */ 2716 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2717 ui[0] = 0; 2718 2719 /* jl: linked list for storing indices of the pivot rows 2720 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2721 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&il,am,&jl);CHKERRQ(ierr); 2722 for (i=0; i<am; i++) { 2723 jl[i] = am; il[i] = 0; 2724 } 2725 2726 /* create and initialize a linked list for storing column indices of the active row k */ 2727 nlnk = am + 1; 2728 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2729 2730 /* initial FreeSpace size is fill*(ai[am]+1) */ 2731 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2732 2733 current_space = free_space; 2734 2735 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2736 2737 current_space_lvl = free_space_lvl; 2738 2739 for (k=0; k<am; k++) { /* for each active row k */ 2740 /* initialize lnk by the column indices of row rip[k] */ 2741 nzk = 0; 2742 ncols = ai[rip[k]+1] - ai[rip[k]]; 2743 cols = aj+ai[rip[k]]; 2744 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2745 nzk += nlnk; 2746 2747 /* update lnk by computing fill-in for each pivot row to be merged in */ 2748 prow = jl[k]; /* 1st pivot row */ 2749 2750 while (prow < k) { 2751 nextprow = jl[prow]; 2752 2753 /* merge prow into k-th row */ 2754 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2755 jmax = ui[prow+1]; 2756 ncols = jmax-jmin; 2757 i = jmin - ui[prow]; 2758 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2759 j = *(uj_lvl_ptr[prow] + i - 1); 2760 cols_lvl = uj_lvl_ptr[prow]+i; 2761 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2762 nzk += nlnk; 2763 2764 /* update il and jl for prow */ 2765 if (jmin < jmax) { 2766 il[prow] = jmin; 2767 j = *cols; 2768 jl[prow] = jl[j]; 2769 jl[j] = prow; 2770 } 2771 prow = nextprow; 2772 } 2773 2774 /* if free space is not available, make more free space */ 2775 if (current_space->local_remaining<nzk) { 2776 i = am - k + 1; /* num of unfactored rows */ 2777 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2778 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2779 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2780 reallocs++; 2781 } 2782 2783 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2784 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2785 2786 /* add the k-th row into il and jl */ 2787 if (nzk-1 > 0) { 2788 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2789 jl[k] = jl[i]; jl[i] = k; 2790 il[k] = ui[k] + 1; 2791 } 2792 uj_ptr[k] = current_space->array; 2793 uj_lvl_ptr[k] = current_space_lvl->array; 2794 2795 current_space->array += nzk; 2796 current_space->local_used += nzk; 2797 current_space->local_remaining -= nzk; 2798 current_space_lvl->array += nzk; 2799 current_space_lvl->local_used += nzk; 2800 current_space_lvl->local_remaining -= nzk; 2801 2802 ui[k+1] = ui[k] + nzk; 2803 } 2804 2805 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2806 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);CHKERRQ(ierr); 2807 2808 /* destroy list of free space and other temporary array(s) */ 2809 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2810 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2811 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2812 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2813 if (ai[am] != 0) { 2814 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2815 } else { 2816 ratio_needed = 0.0; 2817 } 2818 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2819 2820 /* put together the new matrix in MATSEQSBAIJ format */ 2821 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2822 2823 b = (Mat_SeqSBAIJ*)(fact)->data; 2824 2825 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2826 2827 b->singlemalloc = PETSC_FALSE; 2828 b->free_a = PETSC_TRUE; 2829 b->free_ij = free_ij; 2830 2831 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2832 2833 b->j = uj; 2834 b->i = ui; 2835 b->diag = 0; 2836 b->ilen = 0; 2837 b->imax = 0; 2838 b->row = perm; 2839 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2840 2841 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2842 b->icol = perm; 2843 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2844 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2845 2846 b->maxnz = b->nz = ui[am]; 2847 2848 fact->info.factor_mallocs = reallocs; 2849 fact->info.fill_ratio_given = fill; 2850 fact->info.fill_ratio_needed = ratio_needed; 2851 #if defined(PETSC_USE_INFO) 2852 if (ai[am] != 0) { 2853 PetscReal af = fact->info.fill_ratio_needed; 2854 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2855 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2856 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2857 } else { 2858 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2859 } 2860 #endif 2861 if (perm_identity) { 2862 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace; 2863 } else { 2864 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace; 2865 } 2866 PetscFunctionReturn(0); 2867 } 2868 2869