/* Factorization code for BAIJ format. */ #include <../src/mat/impls/baij/seq/baij.h> #include /* Version for when blocks are 3 by 3 */ #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_3_inplace" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_inplace(Mat C,Mat A,const MatFactorInfo *info) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data; IS isrow = b->row,isicol = b->icol; PetscErrorCode ierr; const PetscInt *r,*ic; PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; PetscInt *ajtmpold,*ajtmp,nz,row,*ai=a->i,*aj=a->j; PetscInt *diag_offset = b->diag,idx,*pj; MatScalar *pv,*v,*rtmp,*pc,*w,*x; MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4; MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9; MatScalar *ba = b->a,*aa = a->a; PetscReal shift = info->shiftamount; PetscFunctionBegin; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); ierr = PetscMalloc1(9*(n+1),&rtmp);CHKERRQ(ierr); for (i=0; ia */ pv = ba + 9*bi[i]; pj = bj + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; jops->solve = MatSolve_SeqBAIJ_3_inplace; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3_inplace; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*3*3*3*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); } /* MatLUFactorNumeric_SeqBAIJ_3 - copied from MatLUFactorNumeric_SeqBAIJ_N_inplace() and manually re-implemented PetscKernel_A_gets_A_times_B() PetscKernel_A_gets_A_minus_B_times_C() PetscKernel_A_gets_inverse_A() */ #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_3" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3(Mat B,Mat A,const MatFactorInfo *info) { Mat C =B; Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; IS isrow = b->row,isicol = b->icol; PetscErrorCode ierr; const PetscInt *r,*ic; PetscInt i,j,k,nz,nzL,row; const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag,*pj,bs2=a->bs2; MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; PetscInt flg; PetscReal shift = info->shiftamount; PetscFunctionBegin; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); /* generate work space needed by the factorization */ ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); for (i=0; ia + bs2*bdiag[row]; /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ ierr = PetscKernel_A_gets_A_times_B_3(pc,pv,mwork);CHKERRQ(ierr); pj = b->j + bdiag[row+1] + 1; /* beginning of U(row,:) */ pv = b->a + bs2*(bdiag[row+1]+1); nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:) excluding diag */ for (j=0; ja */ /* L part */ pv = b->a + bs2*bi[i]; pj = b->j + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; ja + bs2*bdiag[i]; pj = b->j + bdiag[i]; ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); /* ierr = PetscKernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); */ ierr = PetscKernel_A_gets_inverse_A_3(pv,shift);CHKERRQ(ierr); /* U part */ pj = b->j + bdiag[i+1] + 1; pv = b->a + bs2*(bdiag[i+1]+1); nz = bdiag[i] - bdiag[i+1] - 1; for (j=0; jops->solve = MatSolve_SeqBAIJ_3; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*3*3*3*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data; PetscErrorCode ierr; PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; PetscInt *ajtmpold,*ajtmp,nz,row; PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; MatScalar *pv,*v,*rtmp,*pc,*w,*x; MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4; MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9; MatScalar *ba = b->a,*aa = a->a; PetscReal shift = info->shiftamount; PetscFunctionBegin; ierr = PetscMalloc1(9*(n+1),&rtmp);CHKERRQ(ierr); for (i=0; ia */ pv = ba + 9*bi[i]; pj = bj + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; jops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering_inplace; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering_inplace; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*3*3*3*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); } /* MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering - copied from MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace() */ #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info) { Mat C =B; Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; PetscErrorCode ierr; PetscInt i,j,k,nz,nzL,row; const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag,*pj,bs2=a->bs2; MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; PetscInt flg; PetscReal shift = info->shiftamount; PetscFunctionBegin; /* generate work space needed by the factorization */ ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); for (i=0; ia + bs2*bdiag[row]; /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ ierr = PetscKernel_A_gets_A_times_B_3(pc,pv,mwork);CHKERRQ(ierr); pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ pv = b->a + bs2*(bdiag[row+1]+1); nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:) excluding diag */ for (j=0; ja */ /* L part */ pv = b->a + bs2*bi[i]; pj = b->j + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; ja + bs2*bdiag[i]; pj = b->j + bdiag[i]; ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); /* ierr = PetscKernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); */ ierr = PetscKernel_A_gets_inverse_A_3(pv,shift);CHKERRQ(ierr); /* U part */ pv = b->a + bs2*(bdiag[i+1]+1); pj = b->j + bdiag[i+1]+1; nz = bdiag[i] - bdiag[i+1] - 1; for (j=0; jops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; C->ops->forwardsolve = MatForwardSolve_SeqBAIJ_3_NaturalOrdering; C->ops->backwardsolve = MatBackwardSolve_SeqBAIJ_3_NaturalOrdering; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*3*3*3*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); }