/* Factorization code for SBAIJ format. */ #include <../src/mat/impls/sbaij/seq/sbaij.h> #include <../src/mat/impls/baij/seq/baij.h> #include #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_N_inplace" PetscErrorCode MatSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; IS isrow=a->row; PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j; PetscErrorCode ierr; const PetscInt *r; PetscInt nz,*vj,k,idx,k1; PetscInt bs =A->rmap->bs,bs2 = a->bs2; const MatScalar *aa=a->a,*v,*diag; PetscScalar *x,*xk,*xj,*xk_tmp,*t; const PetscScalar *b; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); t = a->solve_work; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = PetscMalloc1(bs,&xk_tmp);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ xk = t; for (k=0; kk */ while (nz--) { /* x(:) += U(k,:)^T*(Dk*xk) */ PetscKernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ vj++; xj = t + (*vj)*bs; v += bs2; } /* xk = inv(Dk)*(Dk*xk) */ diag = aa+k*bs2; /* ptr to inv(Dk) */ PetscKernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ } /* solve U*x = y by back substitution */ for (k=mbs-1; k>=0; k--) { v = aa + bs2*ai[k]; xk = t + k*bs; /* xk */ nz = ai[k+1] - ai[k]; vj = aj + ai[k]; xj = t + (*vj)*bs; while (nz--) { /* xk += U(k,:)*x(:) */ PetscKernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ vj++; v += bs2; xj = t + (*vj)*bs; } idx = bs*r[k]; for (k1=0; k1nz -(bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_inplace" PetscErrorCode MatForwardSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx) { PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,1,"not implemented yet"); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_inplace" PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx) { PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,1,"not implemented yet"); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering" PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) { PetscErrorCode ierr; PetscInt nz,k; const PetscInt *vj,bs2 = bs*bs; const MatScalar *v,*diag; PetscScalar *xk,*xj,*xk_tmp; PetscFunctionBegin; ierr = PetscMalloc1(bs,&xk_tmp);CHKERRQ(ierr); for (k=0; kk */ while (nz--) { /* x(:) += U(k,:)^T*(Dk*xk) */ PetscKernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ vj++; xj = x + (*vj)*bs; v += bs2; } /* xk = inv(Dk)*(Dk*xk) */ diag = aa+k*bs2; /* ptr to inv(Dk) */ PetscKernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ } ierr = PetscFree(xk_tmp);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering" PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) { PetscInt nz,k; const PetscInt *vj,bs2 = bs*bs; const MatScalar *v; PetscScalar *xk,*xj; PetscFunctionBegin; for (k=mbs-1; k>=0; k--) { v = aa + bs2*ai[k]; xk = x + k*bs; /* xk */ nz = ai[k+1] - ai[k]; vj = aj + ai[k]; xj = x + (*vj)*bs; while (nz--) { /* xk += U(k,:)*x(:) */ PetscKernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ vj++; v += bs2; xj = x + (*vj)*bs; } } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace" PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; PetscInt bs =A->rmap->bs; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ ierr = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); /* solve U*x = y by back substitution */ ierr = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace" PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; PetscInt bs =A->rmap->bs; const MatScalar *aa=a->a; const PetscScalar *b; PetscScalar *x; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ ierr = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace" PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; PetscInt bs =A->rmap->bs; const MatScalar *aa=a->a; const PetscScalar *b; PetscScalar *x; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_7_inplace" PetscErrorCode MatSolve_SeqSBAIJ_7_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; IS isrow=a->row; const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2,bs=A->rmap->bs,*r,*vj; PetscErrorCode ierr; PetscInt nz,k,idx; const MatScalar *aa=a->a,*v,*d; PetscScalar *x,x0,x1,x2,x3,x4,x5,x6,*t,*tp; const PetscScalar *b; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); t = a->solve_work; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ tp = t; for (k=0; k=0; k--) { v = aa + 49*ai[k]; vj = aj + ai[k]; tp = t + k*7; x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ nz = ai[k+1] - ai[k]; tp = t + (*vj)*7; while (nz--) { /* xk += U(k,:)*x(:) */ x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; vj++; tp = t + (*vj)*7; v += 49; } tp = t + k*7; tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; idx = 7*r[k]; x[idx] = x0; x[idx+1] = x1; x[idx+2] = x2; x[idx+3] = x3; x[idx+4] = x4; x[idx+5] = x5; x[idx+6] = x6; } ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering" PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) { const MatScalar *v,*d; PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; PetscInt nz,k; const PetscInt *vj; PetscFunctionBegin; for (k=0; k=0; k--) { v = aa + 49*ai[k]; xp = x + k*7; x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ nz = ai[k+1] - ai[k]; vj = aj + ai[k]; xp = x + (*vj)*7; PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v-49*nz,49*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ while (nz--) { /* xk += U(k,:)*x(:) */ x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; vj++; v += 49; xp = x + (*vj)*7; } xp = x + k*7; xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace" PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ ierr = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); /* solve U*x = y by back substitution */ ierr = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace" PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace" PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_6_inplace" PetscErrorCode MatSolve_SeqSBAIJ_6_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; IS isrow=a->row; const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2,*r,*vj; PetscErrorCode ierr; PetscInt nz,k,idx; const MatScalar *aa=a->a,*v,*d; PetscScalar *x,x0,x1,x2,x3,x4,x5,*t,*tp; const PetscScalar *b; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); t = a->solve_work; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ tp = t; for (k=0; k=0; k--) { v = aa + 36*ai[k]; vj = aj + ai[k]; tp = t + k*6; x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ nz = ai[k+1] - ai[k]; tp = t + (*vj)*6; while (nz--) { /* xk += U(k,:)*x(:) */ x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; vj++; tp = t + (*vj)*6; v += 36; } tp = t + k*6; tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; idx = 6*r[k]; x[idx] = x0; x[idx+1] = x1; x[idx+2] = x2; x[idx+3] = x3; x[idx+4] = x4; x[idx+5] = x5; } ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering" PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) { const MatScalar *v,*d; PetscScalar *xp,x0,x1,x2,x3,x4,x5; PetscInt nz,k; const PetscInt *vj; PetscFunctionBegin; for (k=0; k=0; k--) { v = aa + 36*ai[k]; xp = x + k*6; x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ nz = ai[k+1] - ai[k]; vj = aj + ai[k]; xp = x + (*vj)*6; PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v-36*nz,36*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ while (nz--) { /* xk += U(k,:)*x(:) */ x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; vj++; v += 36; xp = x + (*vj)*6; } xp = x + k*6; xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace" PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ ierr = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); /* solve U*x = y by back substitution */ ierr = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace" PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ ierr = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace" PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ ierr = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*(a->nz - mbs));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_5_inplace" PetscErrorCode MatSolve_SeqSBAIJ_5_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; IS isrow=a->row; const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2 = a->bs2; PetscErrorCode ierr; const PetscInt *r,*vj; PetscInt nz,k,idx; const MatScalar *aa=a->a,*v,*diag; PetscScalar *x,x0,x1,x2,x3,x4,*t,*tp; const PetscScalar *b; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); t = a->solve_work; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ tp = t; for (k=0; k=0; k--) { v = aa + 25*ai[k]; vj = aj + ai[k]; tp = t + k*5; x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; /* xk */ nz = ai[k+1] - ai[k]; tp = t + (*vj)*5; while (nz--) { /* xk += U(k,:)*x(:) */ x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; vj++; tp = t + (*vj)*5; v += 25; } tp = t + k*5; tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; idx = 5*r[k]; x[idx] = x0; x[idx+1] = x1; x[idx+2] = x2; x[idx+3] = x3; x[idx+4] = x4; } ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering" PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) { const MatScalar *v,*diag; PetscScalar *xp,x0,x1,x2,x3,x4; PetscInt nz,k; const PetscInt *vj; PetscFunctionBegin; for (k=0; k=0; k--) { v = aa + 25*ai[k]; xp = x + k*5; x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; /* xk */ nz = ai[k+1] - ai[k]; vj = aj + ai[k]; xp = x + (*vj)*5; PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v-25*nz,25*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ while (nz--) { /* xk += U(k,:)*x(:) */ x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; vj++; v += 25; xp = x + (*vj)*5; } xp = x + k*5; xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering_inplace" PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2 = a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ ierr = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); /* solve U*x = y by back substitution */ ierr = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace" PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ ierr = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace" PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_4_inplace" PetscErrorCode MatSolve_SeqSBAIJ_4_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; IS isrow=a->row; const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; PetscErrorCode ierr; const PetscInt *r,*vj; PetscInt nz,k,idx; const MatScalar *aa=a->a,*v,*diag; PetscScalar *x,x0,x1,x2,x3,*t,*tp; const PetscScalar *b; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); t = a->solve_work; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ tp = t; for (k=0; k=0; k--) { v = aa + 16*ai[k]; vj = aj + ai[k]; tp = t + k*4; x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ nz = ai[k+1] - ai[k]; tp = t + (*vj)*4; while (nz--) { /* xk += U(k,:)*x(:) */ x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; vj++; tp = t + (*vj)*4; v += 16; } tp = t + k*4; tp[0] =x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; idx = 4*r[k]; x[idx] = x0; x[idx+1] = x1; x[idx+2] = x2; x[idx+3] = x3; } ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering" PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) { const MatScalar *v,*diag; PetscScalar *xp,x0,x1,x2,x3; PetscInt nz,k; const PetscInt *vj; PetscFunctionBegin; for (k=0; k=0; k--) { v = aa + 16*ai[k]; xp = x + k*4; x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ nz = ai[k+1] - ai[k]; vj = aj + ai[k]; xp = x + (*vj)*4; PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v-16*nz,16*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ while (nz--) { /* xk += U(k,:)*x(:) */ x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; vj++; v += 16; xp = x + (*vj)*4; } xp = x + k*4; xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering_inplace" PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ ierr = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); /* solve U*x = y by back substitution */ ierr = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace" PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ ierr = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace" PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_3_inplace" PetscErrorCode MatSolve_SeqSBAIJ_3_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; IS isrow=a->row; const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; PetscErrorCode ierr; const PetscInt *r; PetscInt nz,k,idx; const PetscInt *vj; const MatScalar *aa=a->a,*v,*diag; PetscScalar *x,x0,x1,x2,*t,*tp; const PetscScalar *b; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); t = a->solve_work; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ tp = t; for (k=0; k=0; k--) { v = aa + 9*ai[k]; vj = aj + ai[k]; tp = t + k*3; x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ nz = ai[k+1] - ai[k]; tp = t + (*vj)*3; while (nz--) { /* xk += U(k,:)*x(:) */ x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; vj++; tp = t + (*vj)*3; v += 9; } tp = t + k*3; tp[0] = x0; tp[1] = x1; tp[2] = x2; idx = 3*r[k]; x[idx] = x0; x[idx+1] = x1; x[idx+2] = x2; } ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering" PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) { const MatScalar *v,*diag; PetscScalar *xp,x0,x1,x2; PetscInt nz,k; const PetscInt *vj; PetscFunctionBegin; for (k=0; k=0; k--) { v = aa + 9*ai[k]; xp = x + k*3; x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ nz = ai[k+1] - ai[k]; vj = aj + ai[k]; xp = x + (*vj)*3; PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v-9*nz,9*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ while (nz--) { /* xk += U(k,:)*x(:) */ x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; vj++; v += 9; xp = x + (*vj)*3; } xp = x + k*3; xp[0] = x0; xp[1] = x1; xp[2] = x2; } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering_inplace" PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); /* solve U*x = y by back substitution */ ierr = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace" PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace" PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_2_inplace" PetscErrorCode MatSolve_SeqSBAIJ_2_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; IS isrow=a->row; const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; PetscErrorCode ierr; const PetscInt *r,*vj; PetscInt nz,k,k2,idx; const MatScalar *aa=a->a,*v,*diag; PetscScalar *x,x0,x1,*t; const PetscScalar *b; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); t = a->solve_work; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); /* solve U^T * D * y = perm(b) by forward substitution */ for (k=0; k=0; k--) { v = aa + 4*ai[k]; vj = aj + ai[k]; k2 = k*2; x0 = t[k2]; x1 = t[k2+1]; nz = ai[k+1] - ai[k]; while (nz--) { x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; vj++; v += 4; } t[k2] = x0; t[k2+1] = x1; idx = 2*r[k]; x[idx] = x0; x[idx+1] = x1; } ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering" PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x) { const MatScalar *v,*diag; PetscScalar x0,x1; PetscInt nz,k,k2; const PetscInt *vj; PetscFunctionBegin; for (k=0; k=0; k--) { v = aa + 4*ai[k]; vj = aj + ai[k]; k2 = k*2; x0 = x[k2]; x1 = x[k2+1]; /* xk */ nz = ai[k+1] - ai[k]; PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v-4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ while (nz--) { /* xk += U(k,:)*x(:) */ x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; vj++; v += 4; } x[k2] = x0; x[k2+1] = x1; } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace" PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); /* solve U*x = y by back substitution */ ierr = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace" PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace" PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; const MatScalar *aa=a->a; PetscScalar *x; const PetscScalar *b; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*bs2*(a->nz - mbs));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_1" PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; IS isrow=a->row; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag = a->diag; const MatScalar *aa=a->a,*v; const PetscScalar *b; PetscScalar *x,xk,*t; PetscInt nz,k,j; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); t = a->solve_work; ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); /* solve U^T*D*y = perm(b) by forward substitution */ for (k=0; k=0; k--) { v = aa + adiag[k] - 1; vj = aj + adiag[k] - 1; nz = ai[k+1] - ai[k] - 1; for (j=0; jnz - 3.0*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_1_inplace" PetscErrorCode MatSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; IS isrow=a->row; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; const MatScalar *aa=a->a,*v; PetscScalar *x,xk,*t; const PetscScalar *b; PetscInt nz,k; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); t = a->solve_work; ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); /* solve U^T*D*y = perm(b) by forward substitution */ for (k=0; k=0; k--) { v = aa + ai[k] + 1; vj = aj + ai[k] + 1; nz = ai[k+1] - ai[k] - 1; while (nz--) t[k] += (*v++) * t[*vj++]; x[rp[k]] = t[k]; } ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*a->nz - 3*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1" PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; IS isrow=a->row; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag = a->diag; const MatScalar *aa=a->a,*v; PetscReal diagk; PetscScalar *x,xk; const PetscScalar *b; PetscInt nz,k; PetscFunctionBegin; /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); for (k=0; knz - mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_inplace" PetscErrorCode MatForwardSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; IS isrow=a->row; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; const MatScalar *aa=a->a,*v; PetscReal diagk; PetscScalar *x,xk; const PetscScalar *b; PetscInt nz,k; PetscFunctionBegin; /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); for (k=0; knz - mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1" PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; IS isrow=a->row; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag = a->diag; const MatScalar *aa=a->a,*v; PetscReal diagk; PetscScalar *x,*t; const PetscScalar *b; PetscInt nz,k; PetscFunctionBegin; /* solve D^(1/2)*U*perm(x) = b by back substitution */ ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); t = a->solve_work; ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); for (k=mbs-1; k>=0; k--) { v = aa + ai[k]; vj = aj + ai[k]; diagk = PetscRealPart(aa[adiag[k]]); if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); t[k] = b[k] * PetscSqrtReal(diagk); nz = ai[k+1] - ai[k] - 1; while (nz--) t[k] += (*v++) * t[*vj++]; x[rp[k]] = t[k]; } ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_inplace" PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; IS isrow=a->row; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; const MatScalar *aa=a->a,*v; PetscReal diagk; PetscScalar *x,*t; const PetscScalar *b; PetscInt nz,k; PetscFunctionBegin; /* solve D^(1/2)*U*perm(x) = b by back substitution */ ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); t = a->solve_work; ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); for (k=mbs-1; k>=0; k--) { v = aa + ai[k] + 1; vj = aj + ai[k] + 1; diagk = PetscRealPart(aa[ai[k]]); if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); t[k] = b[k] * PetscSqrtReal(diagk); nz = ai[k+1] - ai[k] - 1; while (nz--) t[k] += (*v++) * t[*vj++]; x[rp[k]] = t[k]; } ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolves_SeqSBAIJ_1" PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; PetscFunctionBegin; if (A->rmap->bs == 1) { ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); } else { IS isrow=a->row; const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; const MatScalar *aa=a->a,*v; PetscScalar *x,*t; const PetscScalar *b; PetscInt nz,k,n,i,j; if (bb->n > a->solves_work_n) { ierr = PetscFree(a->solves_work);CHKERRQ(ierr); ierr = PetscMalloc1(bb->n*A->rmap->N,&a->solves_work);CHKERRQ(ierr); a->solves_work_n = bb->n; } n = bb->n; ierr = VecGetArrayRead(bb->v,&b);CHKERRQ(ierr); ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); t = a->solves_work; ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); /* solve U^T*D*y = perm(b) by forward substitution */ for (k=0; k=0; k--) { v = aa + ai[k] - 1; vj = aj + ai[k] - 1; nz = ai[k+1] - ai[k] - 1; for (j=0; jv,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); ierr = PetscLogFlops(bb->n*(4.0*a->nz - 3.0*mbs));CHKERRQ(ierr); } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolves_SeqSBAIJ_1_inplace" PetscErrorCode MatSolves_SeqSBAIJ_1_inplace(Mat A,Vecs bb,Vecs xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; PetscFunctionBegin; if (A->rmap->bs == 1) { ierr = MatSolve_SeqSBAIJ_1_inplace(A,bb->v,xx->v);CHKERRQ(ierr); } else { IS isrow=a->row; const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; const MatScalar *aa=a->a,*v; PetscScalar *x,*t; const PetscScalar *b; PetscInt nz,k,n,i; if (bb->n > a->solves_work_n) { ierr = PetscFree(a->solves_work);CHKERRQ(ierr); ierr = PetscMalloc1(bb->n*A->rmap->N,&a->solves_work);CHKERRQ(ierr); a->solves_work_n = bb->n; } n = bb->n; ierr = VecGetArrayRead(bb->v,&b);CHKERRQ(ierr); ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); t = a->solves_work; ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); /* solve U^T*D*y = perm(b) by forward substitution */ for (k=0; k=0; k--) { v = aa + ai[k]; vj = aj + ai[k]; nz = ai[k+1] - ai[k]; while (nz--) { for (i=0; iv,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); ierr = PetscLogFlops(bb->n*(4.0*a->nz - 3.0*mbs));CHKERRQ(ierr); } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj,*adiag = a->diag; const MatScalar *aa=a->a,*v; const PetscScalar *b; PetscScalar *x,xi; PetscInt nz,i,j; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); /* solve U^T*D*y = b by forward substitution */ ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); for (i=0; i=0; i--) { xi = x[i]; v = aa + adiag[i] - 1; /* end of row i, excluding diag */ vj = aj + adiag[i] - 1; nz = ai[i+1] - ai[i] - 1; for (j=0; jnz - 3*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace" PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj; const MatScalar *aa=a->a,*v; PetscScalar *x,xk; const PetscScalar *b; PetscInt nz,k; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); /* solve U^T*D*y = b by forward substitution */ ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); for (k=0; k=0; k--) { v = aa + ai[k] + 1; vj = aj + ai[k] + 1; xk = x[k]; nz = ai[k+1] - ai[k] - 1; while (nz--) { xk += (*v++) * x[*vj++]; } x[k] = xk; } ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*a->nz - 3*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*adiag = a->diag,*vj; const MatScalar *aa=a->a,*v; PetscReal diagk; PetscScalar *x; const PetscScalar *b; PetscInt nz,k; PetscFunctionBegin; /* solve U^T*D^(1/2)*x = b by forward substitution */ ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); for (k=0; knz - mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace" PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj; const MatScalar *aa=a->a,*v; PetscReal diagk; PetscScalar *x; const PetscScalar *b; PetscInt nz,k; PetscFunctionBegin; /* solve U^T*D^(1/2)*x = b by forward substitution */ ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); for (k=0; knz - mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*adiag = a->diag,*vj; const MatScalar *aa=a->a,*v; PetscReal diagk; PetscScalar *x; const PetscScalar *b; PetscInt nz,k; PetscFunctionBegin; /* solve D^(1/2)*U*x = b by back substitution */ ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); for (k=mbs-1; k>=0; k--) { v = aa + ai[k]; vj = aj + ai[k]; diagk = PetscRealPart(aa[adiag[k]]); /* note: aa[diag[k]] = 1/D(k) */ if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); x[k] = PetscSqrtReal(diagk)*b[k]; nz = ai[k+1] - ai[k] - 1; while (nz--) x[k] += (*v++) * x[*vj++]; } ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace" PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj; const MatScalar *aa=a->a,*v; PetscReal diagk; PetscScalar *x; const PetscScalar *b; PetscInt nz,k; PetscFunctionBegin; /* solve D^(1/2)*U*x = b by back substitution */ ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); for (k=mbs-1; k>=0; k--) { v = aa + ai[k] + 1; vj = aj + ai[k] + 1; diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); x[k] = PetscSqrtReal(diagk)*b[k]; nz = ai[k+1] - ai[k] - 1; while (nz--) x[k] += (*v++) * x[*vj++]; } ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); PetscFunctionReturn(0); } /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ #undef __FUNCT__ #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat B,Mat A,IS perm,const MatFactorInfo *info) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; PetscErrorCode ierr; const PetscInt *rip,mbs = a->mbs,*ai,*aj; PetscInt *jutmp,bs = A->rmap->bs,bs2=a->bs2,i; PetscInt m,reallocs = 0,*levtmp; PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; PetscInt incrlev,*lev,shift,prow,nz; PetscReal f = info->fill,levels = info->levels; PetscBool perm_identity; PetscFunctionBegin; /* check whether perm is the identity mapping */ ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); if (perm_identity) { a->permute = PETSC_FALSE; ai = a->i; aj = a->j; } else { /* non-trivial permutation */ SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format"); a->permute = PETSC_TRUE; ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); ai = a->inew; aj = a->jnew; } /* initialization */ ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); umax = (PetscInt)(f*ai[mbs] + 1); ierr = PetscMalloc1(umax,&lev);CHKERRQ(ierr); umax += mbs + 1; shift = mbs + 1; ierr = PetscMalloc1(mbs+1,&iu);CHKERRQ(ierr); ierr = PetscMalloc1(umax,&ju);CHKERRQ(ierr); iu[0] = mbs + 1; juidx = mbs + 1; /* prowl: linked list for pivot row */ ierr = PetscMalloc3(mbs,&prowl,mbs,&q,mbs,&levtmp);CHKERRQ(ierr); /* q: linked list for col index */ for (i=0; i k) { qm = k; do { m = qm; qm = q[m]; } while (qm < vj); if (qm == vj) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Duplicate entry in A\n"); nzk++; q[m] = vj; q[vj] = qm; levtmp[vj] = 0; /* initialize lev for nonzero element */ } } /* modify nonzero structure of k-th row by computing fill-in for each row prow to be merged in */ prow = k; prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ while (prow < k) { /* merge row prow into k-th row */ jmin = iu[prow] + 1; jmax = iu[prow+1]; qm = k; for (j=jmin; j levels) continue; vj = ju[j]; do { m = qm; qm = q[m]; } while (qm < vj); if (qm != vj) { /* a fill */ nzk++; q[m] = vj; q[vj] = qm; qm = vj; levtmp[vj] = incrlev; } else if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; } prow = prowl[prow]; /* next pivot row */ } /* add k to row list for first nonzero element in k-th row */ if (nzk > 1) { i = q[k]; /* col value of first nonzero element in k_th row of U */ prowl[k] = prowl[i]; prowl[i] = k; } iu[k+1] = iu[k] + nzk; /* allocate more space to ju and lev if needed */ if (iu[k+1] > umax) { /* estimate how much additional space we will need */ /* use the strategy suggested by David Hysom */ /* just double the memory each time */ maxadd = umax; if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; umax += maxadd; /* allocate a longer ju */ ierr = PetscMalloc1(umax,&jutmp);CHKERRQ(ierr); ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscFree(ju);CHKERRQ(ierr); ju = jutmp; ierr = PetscMalloc1(umax,&jutmp);CHKERRQ(ierr); ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscFree(lev);CHKERRQ(ierr); lev = jutmp; reallocs += 2; /* count how many times we realloc */ } /* save nonzero structure of k-th row in ju */ i=k; while (nzk--) { i = q[i]; ju[juidx] = i; lev[juidx-shift] = levtmp[i]; juidx++; } } #if defined(PETSC_USE_INFO) if (ai[mbs] != 0) { PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); } else { ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); } #endif ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); ierr = PetscFree3(prowl,q,levtmp);CHKERRQ(ierr); ierr = PetscFree(lev);CHKERRQ(ierr); /* put together the new matrix */ ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,NULL);CHKERRQ(ierr); /* ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)iperm);CHKERRQ(ierr); */ b = (Mat_SeqSBAIJ*)(B)->data; ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); b->singlemalloc = PETSC_FALSE; b->free_a = PETSC_TRUE; b->free_ij = PETSC_TRUE; /* the next line frees the default space generated by the Create() */ ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); ierr = PetscMalloc1((iu[mbs]+1)*bs2,&b->a);CHKERRQ(ierr); b->j = ju; b->i = iu; b->diag = 0; b->ilen = 0; b->imax = 0; ierr = ISDestroy(&b->row);CHKERRQ(ierr); ierr = ISDestroy(&b->icol);CHKERRQ(ierr); b->row = perm; b->icol = perm; ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); ierr = PetscMalloc1(bs*mbs+bs,&b->solve_work);CHKERRQ(ierr); /* In b structure: Free imax, ilen, old a, old j. Allocate idnew, solve_work, new a, new j */ ierr = PetscLogObjectMemory((PetscObject)B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); b->maxnz = b->nz = iu[mbs]; (B)->info.factor_mallocs = reallocs; (B)->info.fill_ratio_given = f; if (ai[mbs] != 0) { (B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); } else { (B)->info.fill_ratio_needed = 0.0; } ierr = MatSeqSBAIJSetNumericFactorization_inplace(B,perm_identity);CHKERRQ(ierr); PetscFunctionReturn(0); } /* See MatICCFactorSymbolic_SeqAIJ() for description of its data structure */ #include #include <../src/mat/utils/freespace.h> #undef __FUNCT__ #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; PetscErrorCode ierr; PetscBool perm_identity,free_ij = PETSC_TRUE,missing; PetscInt bs=A->rmap->bs,am=a->mbs,d,*ai=a->i,*aj= a->j; const PetscInt *rip; PetscInt reallocs=0,i,*ui,*udiag,*cols; PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; PetscInt nlnk,*lnk,*lnk_lvl=NULL,ncols,*uj,**uj_ptr,**uj_lvl_ptr; PetscReal fill =info->fill,levels=info->levels; PetscFreeSpaceList free_space =NULL,current_space=NULL; PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; PetscBT lnkbt; PetscFunctionBegin; if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); if (bs > 1) { ierr = MatICCFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info);CHKERRQ(ierr); PetscFunctionReturn(0); } /* check whether perm is the identity mapping */ ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format"); a->permute = PETSC_FALSE; ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); ui[0] = 0; /* ICC(0) without matrix ordering: simply rearrange column indices */ if (!levels) { /* reuse the column pointers and row offsets for memory savings */ for (i=0; ij + ai[i] + 1; /* 1st entry of U(i,:) without diagonal */ ncols = ai[i+1] - ai[i] -1; for (j=0; j0 */ ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); /* initialization */ /* jl: linked list for storing indices of the pivot rows il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&il,am,&jl);CHKERRQ(ierr); for (i=0; ilocal_remainingarray,current_space_lvl->array,lnkbt);CHKERRQ(ierr); /* add the k-th row into il and jl */ if (nzk > 1) { i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ jl[k] = jl[i]; jl[i] = k; il[k] = ui[k] + 1; } uj_ptr[k] = current_space->array; uj_lvl_ptr[k] = current_space_lvl->array; current_space->array += nzk; current_space->local_used += nzk; current_space->local_remaining -= nzk; current_space_lvl->array += nzk; current_space_lvl->local_used += nzk; current_space_lvl->local_remaining -= nzk; ui[k+1] = ui[k] + nzk; } ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); ierr = PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);CHKERRQ(ierr); /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); } /* end of case: levels>0 || (levels=0 && !perm_identity) */ /* put together the new matrix in MATSEQSBAIJ format */ ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); b = (Mat_SeqSBAIJ*)(fact)->data; ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); b->singlemalloc = PETSC_FALSE; b->free_a = PETSC_TRUE; b->free_ij = free_ij; ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); b->j = uj; b->i = ui; b->diag = udiag; b->free_diag = PETSC_TRUE; b->ilen = 0; b->imax = 0; b->row = perm; b->col = perm; ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); b->maxnz = b->nz = ui[am]; fact->info.factor_mallocs = reallocs; fact->info.fill_ratio_given = fill; if (ai[am] != 0) { fact->info.fill_ratio_needed = ((PetscReal)ui[am])/ai[am]; } else { fact->info.fill_ratio_needed = 0.0; } #if defined(PETSC_USE_INFO) if (ai[am] != 0) { PetscReal af = fact->info.fill_ratio_needed; ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); } else { ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); } #endif fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_inplace" PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; Mat_SeqSBAIJ *b; PetscErrorCode ierr; PetscBool perm_identity,free_ij = PETSC_TRUE; PetscInt bs=A->rmap->bs,am=a->mbs; const PetscInt *cols,*rip,*ai=a->i,*aj=a->j; PetscInt reallocs=0,i,*ui; PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; PetscInt nlnk,*lnk,*lnk_lvl=NULL,ncols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; PetscReal fill =info->fill,levels=info->levels,ratio_needed; PetscFreeSpaceList free_space =NULL,current_space=NULL; PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; PetscBT lnkbt; PetscFunctionBegin; /* This code originally uses Modified Sparse Row (MSR) storage (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! Then it is rewritten so the factor B takes seqsbaij format. However the associated MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, thus the original code in MSR format is still used for these cases. The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. */ if (bs > 1) { ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr); PetscFunctionReturn(0); } /* check whether perm is the identity mapping */ ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format"); a->permute = PETSC_FALSE; /* special case that simply copies fill pattern */ if (!levels) { /* reuse the column pointers and row offsets for memory savings */ ui = a->i; uj = a->j; free_ij = PETSC_FALSE; ratio_needed = 1.0; } else { /* case: levels>0 */ ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); /* initialization */ ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); ui[0] = 0; /* jl: linked list for storing indices of the pivot rows il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&il,am,&jl);CHKERRQ(ierr); for (i=0; ilocal_remainingarray,current_space_lvl->array,lnkbt);CHKERRQ(ierr); /* add the k-th row into il and jl */ if (nzk-1 > 0) { i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ jl[k] = jl[i]; jl[i] = k; il[k] = ui[k] + 1; } uj_ptr[k] = current_space->array; uj_lvl_ptr[k] = current_space_lvl->array; current_space->array += nzk; current_space->local_used += nzk; current_space->local_remaining -= nzk; current_space_lvl->array += nzk; current_space_lvl->local_used += nzk; current_space_lvl->local_remaining -= nzk; ui[k+1] = ui[k] + nzk; } ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); ierr = PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);CHKERRQ(ierr); /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); if (ai[am] != 0) { ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); } else { ratio_needed = 0.0; } } /* end of case: levels>0 || (levels=0 && !perm_identity) */ /* put together the new matrix in MATSEQSBAIJ format */ ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); b = (Mat_SeqSBAIJ*)(fact)->data; ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); b->singlemalloc = PETSC_FALSE; b->free_a = PETSC_TRUE; b->free_ij = free_ij; ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); b->j = uj; b->i = ui; b->diag = 0; b->ilen = 0; b->imax = 0; b->row = perm; b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); b->icol = perm; ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); b->maxnz = b->nz = ui[am]; fact->info.factor_mallocs = reallocs; fact->info.fill_ratio_given = fill; fact->info.fill_ratio_needed = ratio_needed; #if defined(PETSC_USE_INFO) if (ai[am] != 0) { PetscReal af = fact->info.fill_ratio_needed; ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); } else { ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); } #endif if (perm_identity) { fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace; } else { fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace; } PetscFunctionReturn(0); }