#include <../src/mat/impls/sbaij/seq/sbaij.h> #include /* Version for when blocks are 4 by 4 */ #undef __FUNCT__ #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_4" PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat C,Mat A,const MatFactorInfo *info) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data; IS perm = b->row; PetscErrorCode ierr; const PetscInt *ai,*aj,*perm_ptr,mbs=a->mbs,*bi=b->i,*bj=b->j; PetscInt i,j,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; MatScalar *ba = b->a,*aa,*ap,*dk,*uik; MatScalar *u,*diag,*rtmp,*rtmp_ptr; PetscBool pivotinblocks = b->pivotinblocks; PetscReal shift = info->shiftamount; PetscFunctionBegin; /* initialization */ ierr = PetscCalloc1(16*mbs,&rtmp);CHKERRQ(ierr); ierr = PetscMalloc2(mbs,&il,mbs,&jl);CHKERRQ(ierr); for (i=0; ipermute) { ai = a->i; aj = a->j; aa = a->a; } else { ai = a->inew; aj = a->jnew; ierr = PetscMalloc1(16*ai[mbs],&aa);CHKERRQ(ierr); ierr = PetscMemcpy(aa,a->a,16*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); ierr = PetscMalloc1(ai[mbs],&a2anew);CHKERRQ(ierr); ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i aj[j]) { /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ ap = aa + j*16; /* ptr to the beginning of j-th block of aa */ for (k=0; k<16; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ for (k=0; k<4; k++) { /* j-th block of aa <- dk^T */ for (k1=0; k1<4; k1++) *ap++ = dk[k + 4*k1]; } } } } ierr = PetscFree(a2anew);CHKERRQ(ierr); } /* for each row k */ for (k = 0; kpermute) { ierr = PetscFree(aa);CHKERRQ(ierr); } ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); C->ops->solve = MatSolve_SeqSBAIJ_4_inplace; C->ops->solvetranspose = MatSolve_SeqSBAIJ_4_inplace; C->assembled = PETSC_TRUE; C->preallocated = PETSC_TRUE; ierr = PetscLogFlops(1.3333*64*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); }