/* Factorization code for BAIJ format. */ #include <../src/mat/impls/baij/seq/baij.h> #include /* ------------------------------------------------------------*/ /* Version for when blocks are 6 by 6 */ #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_6_inplace" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_inplace(Mat C,Mat A,const MatFactorInfo *info) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data; IS isrow = b->row,isicol = b->icol; PetscErrorCode ierr; const PetscInt *ajtmpold,*ajtmp,*diag_offset = b->diag,*r,*ic,*bi = b->i,*bj = b->j,*ai=a->i,*aj=a->j,*pj; PetscInt nz,row,i,j,n = a->mbs,idx; MatScalar *pv,*v,*rtmp,*pc,*w,*x; MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4; MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16; MatScalar x17,x18,x19,x20,x21,x22,x23,x24,x25,p10,p11,p12,p13,p14; MatScalar p15,p16,p17,p18,p19,p20,p21,p22,p23,p24,p25,m10,m11,m12; MatScalar m13,m14,m15,m16,m17,m18,m19,m20,m21,m22,m23,m24,m25; MatScalar p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36; MatScalar x26,x27,x28,x29,x30,x31,x32,x33,x34,x35,x36; MatScalar m26,m27,m28,m29,m30,m31,m32,m33,m34,m35,m36; MatScalar *ba = b->a,*aa = a->a; PetscReal shift = info->shiftamount; PetscFunctionBegin; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); ierr = PetscMalloc1(36*(n+1),&rtmp);CHKERRQ(ierr); for (i=0; ia */ pv = ba + 36*bi[i]; pj = bj + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; jops->solve = MatSolve_SeqBAIJ_6_inplace; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6_inplace; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*6*6*6*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_6" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6(Mat B,Mat A,const MatFactorInfo *info) { Mat C = B; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; IS isrow = b->row,isicol = b->icol; PetscErrorCode ierr; const PetscInt *r,*ic; PetscInt i,j,k,nz,nzL,row; const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag,*pj,bs2=a->bs2; MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; PetscInt flg; PetscReal shift = info->shiftamount; PetscFunctionBegin; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); /* generate work space needed by the factorization */ ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); for (i=0; ia + bs2*bdiag[row]; /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ ierr = PetscKernel_A_gets_A_times_B_6(pc,pv,mwork);CHKERRQ(ierr); pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ pv = b->a + bs2*(bdiag[row+1]+1); nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ for (j=0; ja */ /* L part */ pv = b->a + bs2*bi[i]; pj = b->j + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; ja + bs2*bdiag[i]; pj = b->j + bdiag[i]; ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); /* ierr = PetscKernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); */ ierr = PetscKernel_A_gets_inverse_A_6(pv,shift);CHKERRQ(ierr); /* U part */ pv = b->a + bs2*(bdiag[i+1]+1); pj = b->j + bdiag[i+1]+1; nz = bdiag[i] - bdiag[i+1] - 1; for (j=0; jops->solve = MatSolve_SeqBAIJ_6; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*6*6*6*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data; PetscErrorCode ierr; PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; PetscInt *ajtmpold,*ajtmp,nz,row; PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; MatScalar *pv,*v,*rtmp,*pc,*w,*x; MatScalar x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; MatScalar x16,x17,x18,x19,x20,x21,x22,x23,x24,x25; MatScalar p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15; MatScalar p16,p17,p18,p19,p20,p21,p22,p23,p24,p25; MatScalar m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12,m13,m14,m15; MatScalar m16,m17,m18,m19,m20,m21,m22,m23,m24,m25; MatScalar p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36; MatScalar x26,x27,x28,x29,x30,x31,x32,x33,x34,x35,x36; MatScalar m26,m27,m28,m29,m30,m31,m32,m33,m34,m35,m36; MatScalar *ba = b->a,*aa = a->a; PetscReal shift = info->shiftamount; PetscFunctionBegin; ierr = PetscMalloc1(36*(n+1),&rtmp);CHKERRQ(ierr); for (i=0; ia */ pv = ba + 36*bi[i]; pj = bj + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; jops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering_inplace; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering_inplace; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*6*6*6*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info) { Mat C =B; Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; PetscErrorCode ierr; PetscInt i,j,k,nz,nzL,row; const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag,*pj,bs2=a->bs2; MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; PetscInt flg; PetscReal shift = info->shiftamount; PetscFunctionBegin; /* generate work space needed by the factorization */ ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); for (i=0; ia + bs2*bdiag[row]; /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ ierr = PetscKernel_A_gets_A_times_B_6(pc,pv,mwork);CHKERRQ(ierr); pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ pv = b->a + bs2*(bdiag[row+1]+1); nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ for (j=0; ja */ /* L part */ pv = b->a + bs2*bi[i]; pj = b->j + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; ja + bs2*bdiag[i]; pj = b->j + bdiag[i]; ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); /* ierr = PetscKernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); */ ierr = PetscKernel_A_gets_inverse_A_6(pv,shift);CHKERRQ(ierr); /* U part */ pv = b->a + bs2*(bdiag[i+1]+1); pj = b->j + bdiag[i+1]+1; nz = bdiag[i] - bdiag[i+1] - 1; for (j=0; jops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*6*6*6*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); }