/* Factorization code for BAIJ format. */ #include <../src/mat/impls/baij/seq/baij.h> #include /* ------------------------------------------------------------*/ /* Version for when blocks are 5 by 5 */ #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_5_inplace" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_inplace(Mat C,Mat A,const MatFactorInfo *info) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data; IS isrow = b->row,isicol = b->icol; PetscErrorCode ierr; const PetscInt *r,*ic,*bi = b->i,*bj = b->j,*ajtmpold,*ajtmp; PetscInt i,j,n = a->mbs,nz,row,idx,ipvt[5]; const PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; MatScalar *w,*pv,*rtmp,*x,*pc; const MatScalar *v,*aa = a->a; MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4; MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16; MatScalar x17,x18,x19,x20,x21,x22,x23,x24,x25,p10,p11,p12,p13,p14; MatScalar p15,p16,p17,p18,p19,p20,p21,p22,p23,p24,p25,m10,m11,m12; MatScalar m13,m14,m15,m16,m17,m18,m19,m20,m21,m22,m23,m24,m25; MatScalar *ba = b->a,work[25]; PetscReal shift = info->shiftamount; PetscFunctionBegin; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); ierr = PetscMalloc1(25*(n+1),&rtmp);CHKERRQ(ierr); #define PETSC_USE_MEMZERO 1 #define PETSC_USE_MEMCPY 1 for (i=0; ia */ pv = ba + 25*bi[i]; pj = bj + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; jops->solve = MatSolve_SeqBAIJ_5_inplace; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_inplace; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*5*5*5*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); } /* MatLUFactorNumeric_SeqBAIJ_5 - copied from MatLUFactorNumeric_SeqBAIJ_N_inplace() and manually re-implemented PetscKernel_A_gets_A_times_B() PetscKernel_A_gets_A_minus_B_times_C() PetscKernel_A_gets_inverse_A() */ #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_5" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5(Mat B,Mat A,const MatFactorInfo *info) { Mat C =B; Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; IS isrow = b->row,isicol = b->icol; PetscErrorCode ierr; const PetscInt *r,*ic; PetscInt i,j,k,nz,nzL,row; const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag,*pj,bs2=a->bs2; MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a,work[25]; PetscInt flg,ipvt[5]; PetscReal shift = info->shiftamount; PetscFunctionBegin; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); /* generate work space needed by the factorization */ ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); for (i=0; ia + bs2*bdiag[row]; /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ ierr = PetscKernel_A_gets_A_times_B_5(pc,pv,mwork);CHKERRQ(ierr); pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ pv = b->a + bs2*(bdiag[row+1]+1); nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ for (j=0; ja */ /* L part */ pv = b->a + bs2*bi[i]; pj = b->j + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; ja + bs2*bdiag[i]; pj = b->j + bdiag[i]; ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); /* ierr = PetscKernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); */ ierr = PetscKernel_A_gets_inverse_A_5(pv,ipvt,work,shift);CHKERRQ(ierr); /* U part */ pv = b->a + bs2*(bdiag[i+1]+1); pj = b->j + bdiag[i+1]+1; nz = bdiag[i] - bdiag[i+1] - 1; for (j=0; jops->solve = MatSolve_SeqBAIJ_5; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*5*5*5*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); } /* Version for when blocks are 5 by 5 Using natural ordering */ #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data; PetscErrorCode ierr; PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j,ipvt[5]; PetscInt *ajtmpold,*ajtmp,nz,row; PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; MatScalar *pv,*v,*rtmp,*pc,*w,*x; MatScalar x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; MatScalar x16,x17,x18,x19,x20,x21,x22,x23,x24,x25; MatScalar p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15; MatScalar p16,p17,p18,p19,p20,p21,p22,p23,p24,p25; MatScalar m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12,m13,m14,m15; MatScalar m16,m17,m18,m19,m20,m21,m22,m23,m24,m25; MatScalar *ba = b->a,*aa = a->a,work[25]; PetscReal shift = info->shiftamount; PetscFunctionBegin; ierr = PetscMalloc1(25*(n+1),&rtmp);CHKERRQ(ierr); for (i=0; ia */ pv = ba + 25*bi[i]; pj = bj + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; jops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering_inplace; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering_inplace; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*5*5*5*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info) { Mat C =B; Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; PetscErrorCode ierr; PetscInt i,j,k,nz,nzL,row; const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag,*pj,bs2=a->bs2; MatScalar *rtmp,*pc,*mwork,*v,*vv,*pv,*aa=a->a,work[25]; PetscInt flg,ipvt[5]; PetscReal shift = info->shiftamount; PetscFunctionBegin; /* generate work space needed by the factorization */ ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); for (i=0; ia + bs2*bdiag[row]; /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ ierr = PetscKernel_A_gets_A_times_B_5(pc,pv,mwork);CHKERRQ(ierr); pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ pv = b->a + bs2*(bdiag[row+1]+1); nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ for (j=0; ja */ /* L part */ pv = b->a + bs2*bi[i]; pj = b->j + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; ja + bs2*bdiag[i]; pj = b->j + bdiag[i]; ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); /* ierr = PetscKernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); */ ierr = PetscKernel_A_gets_inverse_A_5(pv,ipvt,work,shift);CHKERRQ(ierr); /* U part */ pv = b->a + bs2*(bdiag[i+1]+1); pj = b->j + bdiag[i+1]+1; nz = bdiag[i] - bdiag[i+1] - 1; for (j=0; jops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*5*5*5*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); }