/* Factorization code for BAIJ format. */ #include <../src/mat/impls/baij/seq/baij.h> #include #include #include <../src/mat/utils/freespace.h> /* ----------------------------------------------------------------*/ extern PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat,Mat,MatDuplicateOption,PetscBool); #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering" /* This is not much faster than MatLUFactorNumeric_SeqBAIJ_N() but the solve is faster at least sometimes */ PetscErrorCode MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info) { Mat C =B; Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; PetscErrorCode ierr; PetscInt i,j,k,ipvt[15]; const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ajtmp,*bjtmp,*bdiag=b->diag,*pj; PetscInt nz,nzL,row; MatScalar *rtmp,*pc,*mwork,*pv,*vv,work[225]; const MatScalar *v,*aa=a->a; PetscInt bs2 = a->bs2,bs=A->rmap->bs,flg; PetscInt sol_ver; PetscFunctionBegin; ierr = PetscOptionsGetInt(((PetscObject)A)->prefix,"-sol_ver",&sol_ver,NULL);CHKERRQ(ierr); /* generate work space needed by the factorization */ ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); for (i=0; ia + bs2*bdiag[row]; PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); /*ierr = PetscKernel_A_gets_A_times_B_15(pc,pv,mwork);CHKERRQ(ierr);*/ pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ pv = b->a + bs2*(bdiag[row+1]+1); nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ for (j=0; ja */ /* L part */ pv = b->a + bs2*bi[i]; pj = b->j + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; ja + bs2*bdiag[i]; pj = b->j + bdiag[i]; ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); /* PetscKernel_A_gets_inverse_A(bs,pv,pivots,work); */ ierr = PetscKernel_A_gets_inverse_A_15(pv,ipvt,work,info->shiftamount);CHKERRQ(ierr); /* U part */ pv = b->a + bs2*(bdiag[i+1]+1); pj = b->j + bdiag[i+1]+1; nz = bdiag[i] - bdiag[i+1] - 1; for (j=0; jops->solve = MatSolve_SeqBAIJ_15_NaturalOrdering_ver1; C->ops->solvetranspose = MatSolve_SeqBAIJ_N_NaturalOrdering; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_N" PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat B,Mat A,const MatFactorInfo *info) { Mat C =B; Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; IS isrow = b->row,isicol = b->icol; PetscErrorCode ierr; const PetscInt *r,*ic; PetscInt i,j,k,n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; PetscInt bs=A->rmap->bs,bs2 = a->bs2,*v_pivots,flg; MatScalar *v_work; PetscBool col_identity,row_identity,both_identity; PetscFunctionBegin; ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); ierr = PetscMalloc1(bs2*n,&rtmp);CHKERRQ(ierr); ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); /* generate work space needed by dense LU factorization */ ierr = PetscMalloc3(bs,&v_work,bs2,&mwork,bs,&v_pivots);CHKERRQ(ierr); for (i=0; ia + bs2*bdiag[row]; PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); /* *pc = *pc * (*pv); */ pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ pv = b->a + bs2*(bdiag[row+1]+1); nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ for (j=0; ja */ /* L part */ pv = b->a + bs2*bi[i]; pj = b->j + bi[i]; nz = bi[i+1] - bi[i]; for (j=0; ja + bs2*bdiag[i]; pj = b->j + bdiag[i]; /* if (*pj != i)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"row %d != *pj %d",i,*pj); */ ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); ierr = PetscKernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); /* U part */ pv = b->a + bs2*(bdiag[i+1]+1); pj = b->j + bdiag[i+1]+1; nz = bdiag[i] - bdiag[i+1] - 1; for (j=0; jops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering; } else { C->ops->solve = MatSolve_SeqBAIJ_N; } C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_N; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); } /* ilu(0) with natural ordering under new data structure. See MatILUFactorSymbolic_SeqAIJ_ilu0() for detailed description because this code is almost identical to MatILUFactorSymbolic_SeqAIJ_ilu0_inplace(). */ #undef __FUNCT__ #define __FUNCT__ "MatILUFactorSymbolic_SeqBAIJ_ilu0" PetscErrorCode MatILUFactorSymbolic_SeqBAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; PetscErrorCode ierr; PetscInt n=a->mbs,*ai=a->i,*aj,*adiag=a->diag,bs2 = a->bs2; PetscInt i,j,nz,*bi,*bj,*bdiag,bi_temp; PetscFunctionBegin; ierr = MatDuplicateNoCreate_SeqBAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); b = (Mat_SeqBAIJ*)(fact)->data; /* allocate matrix arrays for new data structure */ ierr = PetscMalloc3(bs2*ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); b->singlemalloc = PETSC_TRUE; b->free_a = PETSC_TRUE; b->free_ij = PETSC_TRUE; fact->preallocated = PETSC_TRUE; fact->assembled = PETSC_TRUE; if (!b->diag) { ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); } bdiag = b->diag; if (n > 0) { ierr = PetscMemzero(b->a,bs2*ai[n]*sizeof(MatScalar));CHKERRQ(ierr); } /* set bi and bj with new data structure */ bi = b->i; bj = b->j; /* L part */ bi[0] = 0; for (i=0; ij + ai[i]; for (j=0; j=0; i--) { nz = ai[i+1] - adiag[i] - 1; bi_temp = bi_temp + nz + 1; aj = a->j + adiag[i] + 1; for (j=0; jdata,*b; IS isicol; PetscErrorCode ierr; const PetscInt *r,*ic; PetscInt n=a->mbs,*ai=a->i,*aj=a->j,d; PetscInt *bi,*cols,nnz,*cols_lvl; PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; PetscInt i,levels,diagonal_fill; PetscBool col_identity,row_identity,both_identity; PetscReal f; PetscInt nlnk,*lnk,*lnk_lvl=NULL; PetscBT lnkbt; PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; PetscFreeSpaceList free_space =NULL,current_space=NULL; PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; PetscBool missing; PetscInt bs=A->rmap->bs,bs2=a->bs2; PetscFunctionBegin; if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); if (bs>1) { /* check shifttype */ if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); } ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); f = info->fill; levels = (PetscInt)info->levels; diagonal_fill = (PetscInt)info->diagonal_fill; ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); both_identity = (PetscBool) (row_identity && col_identity); if (!levels && both_identity) { /* special case: ilu(0) with natural ordering */ ierr = MatILUFactorSymbolic_SeqBAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); ierr = MatSeqBAIJSetNumericFactorization(fact,both_identity);CHKERRQ(ierr); fact->factortype = MAT_FACTOR_ILU; (fact)->info.factor_mallocs = 0; (fact)->info.fill_ratio_given = info->fill; (fact)->info.fill_ratio_needed = 1.0; b = (Mat_SeqBAIJ*)(fact)->data; b->row = isrow; b->col = iscol; b->icol = isicol; ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; ierr = PetscMalloc1((n+1)*bs,&b->solve_work);CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); /* get new row pointers */ ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); bi[0] = 0; /* bdiag is location of diagonal in factor */ ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); bdiag[0] = 0; ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); /* create a linked list for storing column indices of the active row */ nlnk = n + 1; ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); /* initial FreeSpace size is f*(ai[n]+1) */ ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); current_space = free_space; ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); current_space_lvl = free_space_lvl; for (i=0; ilocal_remainingarray,current_space_lvl->array,lnkbt);CHKERRQ(ierr); bj_ptr[i] = current_space->array; bjlvl_ptr[i] = current_space_lvl->array; /* make sure the active row i has diagonal entry */ if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); current_space->array += nzi; current_space->local_used += nzi; current_space->local_remaining -= nzi; current_space_lvl->array += nzi; current_space_lvl->local_used += nzi; current_space_lvl->local_remaining -= nzi; } ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); #if defined(PETSC_USE_INFO) { PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); if (diagonal_fill) { ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); } } #endif /* put together the new matrix */ ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); b = (Mat_SeqBAIJ*)(fact)->data; b->free_a = PETSC_TRUE; b->free_ij = PETSC_TRUE; b->singlemalloc = PETSC_FALSE; ierr = PetscMalloc1(bs2*(bdiag[0]+1),&b->a);CHKERRQ(ierr); b->j = bj; b->i = bi; b->diag = bdiag; b->free_diag = PETSC_TRUE; b->ilen = 0; b->imax = 0; b->row = isrow; b->col = iscol; ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); b->icol = isicol; ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); /* In b structure: Free imax, ilen, old a, old j. Allocate bdiag, solve_work, new a, new j */ ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1) * (sizeof(PetscInt)+bs2*sizeof(PetscScalar)));CHKERRQ(ierr); b->maxnz = b->nz = bdiag[0]+1; fact->info.factor_mallocs = reallocs; fact->info.fill_ratio_given = f; fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); ierr = MatSeqBAIJSetNumericFactorization(fact,both_identity);CHKERRQ(ierr); PetscFunctionReturn(0); } /* This code is virtually identical to MatILUFactorSymbolic_SeqAIJ except that the data structure of Mat_SeqAIJ is slightly different. Not a good example of code reuse. */ #undef __FUNCT__ #define __FUNCT__ "MatILUFactorSymbolic_SeqBAIJ_inplace" PetscErrorCode MatILUFactorSymbolic_SeqBAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; IS isicol; PetscErrorCode ierr; const PetscInt *r,*ic,*ai = a->i,*aj = a->j,*xi; PetscInt prow,n = a->mbs,*ainew,*ajnew,jmax,*fill,nz,*im,*ajfill,*flev,*xitmp; PetscInt *dloc,idx,row,m,fm,nzf,nzi,reallocate = 0,dcount = 0; PetscInt incrlev,nnz,i,bs = A->rmap->bs,bs2 = a->bs2,levels,diagonal_fill,dd; PetscBool col_identity,row_identity,both_identity,flg; PetscReal f; PetscFunctionBegin; ierr = MatMissingDiagonal_SeqBAIJ(A,&flg,&dd);CHKERRQ(ierr); if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix A is missing diagonal entry in row %D",dd); f = info->fill; levels = (PetscInt)info->levels; diagonal_fill = (PetscInt)info->diagonal_fill; ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); both_identity = (PetscBool) (row_identity && col_identity); if (!levels && both_identity) { /* special case copy the nonzero structure */ ierr = MatDuplicateNoCreate_SeqBAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSeqBAIJSetNumericFactorization_inplace(fact,both_identity);CHKERRQ(ierr); fact->factortype = MAT_FACTOR_ILU; b = (Mat_SeqBAIJ*)fact->data; b->row = isrow; b->col = iscol; ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); b->icol = isicol; b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; ierr = PetscMalloc1((n+1)*bs,&b->solve_work);CHKERRQ(ierr); PetscFunctionReturn(0); } /* general case perform the symbolic factorization */ ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); /* get new row pointers */ ierr = PetscMalloc1(n+1,&ainew);CHKERRQ(ierr); ainew[0] = 0; /* don't know how many column pointers are needed so estimate */ jmax = (PetscInt)(f*ai[n] + 1); ierr = PetscMalloc1(jmax,&ajnew);CHKERRQ(ierr); /* ajfill is level of fill for each fill entry */ ierr = PetscMalloc1(jmax,&ajfill);CHKERRQ(ierr); /* fill is a linked list of nonzeros in active row */ ierr = PetscMalloc1(n+1,&fill);CHKERRQ(ierr); /* im is level for each filled value */ ierr = PetscMalloc1(n+1,&im);CHKERRQ(ierr); /* dloc is location of diagonal in factor */ ierr = PetscMalloc1(n+1,&dloc);CHKERRQ(ierr); dloc[0] = 0; for (prow=0; prow 0) { idx = *xi++; if (*flev + incrlev > levels) { flev++; continue; } do { m = fm; fm = fill[m]; } while (fm < idx); if (fm != idx) { im[idx] = *flev + incrlev; fill[m] = idx; fill[idx] = fm; fm = idx; nzf++; } else if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; flev++; } row = fill[row]; nzi++; } /* copy new filled row into permanent storage */ ainew[prow+1] = ainew[prow] + nzf; if (ainew[prow+1] > jmax) { /* estimate how much additional space we will need */ /* use the strategy suggested by David Hysom */ /* just double the memory each time */ PetscInt maxadd = jmax; /* maxadd = (int)(((f*ai[n]+1)*(n-prow+5))/n); */ if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); jmax += maxadd; /* allocate a longer ajnew and ajfill */ ierr = PetscMalloc1(jmax,&xitmp);CHKERRQ(ierr); ierr = PetscMemcpy(xitmp,ajnew,ainew[prow]*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscFree(ajnew);CHKERRQ(ierr); ajnew = xitmp; ierr = PetscMalloc1(jmax,&xitmp);CHKERRQ(ierr); ierr = PetscMemcpy(xitmp,ajfill,ainew[prow]*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscFree(ajfill);CHKERRQ(ierr); ajfill = xitmp; reallocate++; /* count how many reallocations are needed */ } xitmp = ajnew + ainew[prow]; flev = ajfill + ainew[prow]; dloc[prow] = nzi; fm = fill[n]; while (nzf--) { *xitmp++ = fm; *flev++ = im[fm]; fm = fill[fm]; } /* make sure row has diagonal entry */ if (ajnew[ainew[prow]+dloc[prow]] != prow) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",prow); } ierr = PetscFree(ajfill);CHKERRQ(ierr); ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); ierr = PetscFree(fill);CHKERRQ(ierr); ierr = PetscFree(im);CHKERRQ(ierr); #if defined(PETSC_USE_INFO) { PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocate,(double)f,(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); if (diagonal_fill) { ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); } } #endif /* put together the new matrix */ ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); b = (Mat_SeqBAIJ*)fact->data; b->free_a = PETSC_TRUE; b->free_ij = PETSC_TRUE; b->singlemalloc = PETSC_FALSE; ierr = PetscMalloc1(bs2*ainew[n],&b->a);CHKERRQ(ierr); b->j = ajnew; b->i = ainew; for (i=0; idiag = dloc; b->free_diag = PETSC_TRUE; b->ilen = 0; b->imax = 0; b->row = isrow; b->col = iscol; b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); b->icol = isicol; ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); /* In b structure: Free imax, ilen, old a, old j. Allocate dloc, solve_work, new a, new j */ ierr = PetscLogObjectMemory((PetscObject)fact,(ainew[n]-n)*(sizeof(PetscInt))+bs2*ainew[n]*sizeof(PetscScalar));CHKERRQ(ierr); b->maxnz = b->nz = ainew[n]; fact->info.factor_mallocs = reallocate; fact->info.fill_ratio_given = f; fact->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); ierr = MatSeqBAIJSetNumericFactorization_inplace(fact,both_identity);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE" PetscErrorCode MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE(Mat A) { /* Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; */ /* int i,*AJ=a->j,nz=a->nz; */ PetscFunctionBegin; /* Undo Column scaling */ /* while (nz--) { */ /* AJ[i] = AJ[i]/4; */ /* } */ /* This should really invoke a push/pop logic, but we don't have that yet. */ A->ops->setunfactored = NULL; PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj" PetscErrorCode MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj(Mat A) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscInt *AJ=a->j,nz=a->nz; unsigned short *aj=(unsigned short*)AJ; PetscFunctionBegin; /* Is this really necessary? */ while (nz--) { AJ[nz] = (int)((unsigned int)aj[nz]); /* First extend, then convert to signed. */ } A->ops->setunfactored = NULL; PetscFunctionReturn(0); }