1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for BAIJ format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" 7 #include "src/inline/ilu.h" 8 9 /* ----------------------------------------------------------- */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_N" 12 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat A,MatFactorInfo *info,Mat *B) 13 { 14 Mat C = *B; 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 16 IS isrow = b->row,isicol = b->icol; 17 PetscErrorCode ierr; 18 PetscInt *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 19 PetscInt *ajtmpold,*ajtmp,nz,row,bslog,*ai=a->i,*aj=a->j,k,flg; 20 PetscInt *diag_offset=b->diag,diag,bs=A->rmap.bs,bs2 = a->bs2,*pj,*v_pivots; 21 MatScalar *ba = b->a,*aa = a->a,*pv,*v,*rtmp,*multiplier,*v_work,*pc,*w; 22 23 PetscFunctionBegin; 24 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 25 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 26 ierr = PetscMalloc(bs2*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 27 ierr = PetscMemzero(rtmp,bs2*(n+1)*sizeof(MatScalar));CHKERRQ(ierr); 28 /* generate work space needed by dense LU factorization */ 29 ierr = PetscMalloc(bs*sizeof(PetscInt) + (bs+bs2)*sizeof(MatScalar),&v_work);CHKERRQ(ierr); 30 multiplier = v_work + bs; 31 v_pivots = (PetscInt*)(multiplier + bs2); 32 33 /* flops in while loop */ 34 bslog = 2*bs*bs2; 35 36 for (i=0; i<n; i++) { 37 nz = bi[i+1] - bi[i]; 38 ajtmp = bj + bi[i]; 39 for (j=0; j<nz; j++) { 40 ierr = PetscMemzero(rtmp+bs2*ajtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 41 } 42 /* load in initial (unfactored row) */ 43 nz = ai[r[i]+1] - ai[r[i]]; 44 ajtmpold = aj + ai[r[i]]; 45 v = aa + bs2*ai[r[i]]; 46 for (j=0; j<nz; j++) { 47 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmpold[j]],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 48 } 49 row = *ajtmp++; 50 while (row < i) { 51 pc = rtmp + bs2*row; 52 /* if (*pc) { */ 53 for (flg=0,k=0; k<bs2; k++) { if (pc[k]!=0.0) { flg = 1; break; }} 54 if (flg) { 55 pv = ba + bs2*diag_offset[row]; 56 pj = bj + diag_offset[row] + 1; 57 Kernel_A_gets_A_times_B(bs,pc,pv,multiplier); 58 nz = bi[row+1] - diag_offset[row] - 1; 59 pv += bs2; 60 for (j=0; j<nz; j++) { 61 Kernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); 62 } 63 ierr = PetscLogFlops(bslog*(nz+1)-bs);CHKERRQ(ierr); 64 } 65 row = *ajtmp++; 66 } 67 /* finished row so stick it into b->a */ 68 pv = ba + bs2*bi[i]; 69 pj = bj + bi[i]; 70 nz = bi[i+1] - bi[i]; 71 for (j=0; j<nz; j++) { 72 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 73 } 74 diag = diag_offset[i] - bi[i]; 75 /* invert diagonal block */ 76 w = pv + bs2*diag; 77 ierr = Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);CHKERRQ(ierr); 78 } 79 80 ierr = PetscFree(rtmp);CHKERRQ(ierr); 81 ierr = PetscFree(v_work);CHKERRQ(ierr); 82 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 83 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 84 C->factor = MAT_FACTOR_LU; 85 C->assembled = PETSC_TRUE; 86 ierr = PetscLogFlops(1.3333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 87 PetscFunctionReturn(0); 88 } 89