1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for BAIJ format. 5 */ 6 #include "../src/mat/impls/baij/seq/baij.h" 7 #include "../src/mat/blockinvert.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatSeqBAIJSetNumericFactorization" 11 /* 12 This is used to set the numeric factorization for both LU and ILU symbolic factorization 13 */ 14 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural) 15 { 16 PetscFunctionBegin; 17 if (natural) { 18 switch (inA->rmap->bs) { 19 case 1: 20 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 21 break; 22 case 2: 23 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 24 break; 25 case 3: 26 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 27 break; 28 case 4: 29 #if defined(PETSC_USE_MAT_SINGLE) 30 { 31 PetscTruth sse_enabled_local; 32 PetscErrorCode ierr; 33 ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr); 34 if (sse_enabled_local) { 35 # if defined(PETSC_HAVE_SSE) 36 int i,*AJ=a->j,nz=a->nz,n=a->mbs; 37 if (n==(unsigned short)n) { 38 unsigned short *aj=(unsigned short *)AJ; 39 for (i=0;i<nz;i++) { 40 aj[i] = (unsigned short)AJ[i]; 41 } 42 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 43 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 44 ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr); 45 } else { 46 /* Scale the column indices for easier indexing in MatSolve. */ 47 /* for (i=0;i<nz;i++) { */ 48 /* AJ[i] = AJ[i]*4; */ 49 /* } */ 50 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 51 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 52 ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr); 53 } 54 # else 55 /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 56 SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable"); 57 # endif 58 } else { 59 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 60 } 61 } 62 #else 63 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 64 #endif 65 break; 66 case 5: 67 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 68 break; 69 case 6: 70 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 71 break; 72 case 7: 73 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 74 break; 75 default: 76 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 77 break; 78 } 79 } else { 80 switch (inA->rmap->bs) { 81 case 1: 82 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 83 break; 84 case 2: 85 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 86 break; 87 case 3: 88 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 89 break; 90 case 4: 91 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 92 break; 93 case 5: 94 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 95 break; 96 case 6: 97 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 98 break; 99 case 7: 100 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 101 break; 102 default: 103 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 104 break; 105 } 106 } 107 PetscFunctionReturn(0); 108 } 109 110 /* 111 The symbolic factorization code is identical to that for AIJ format, 112 except for very small changes since this is now a SeqBAIJ datastructure. 113 NOT good code reuse. 114 */ 115 #include "petscbt.h" 116 #include "../src/mat/utils/freespace.h" 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ" 120 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 121 { 122 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 123 PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 124 PetscTruth row_identity,col_identity,both_identity; 125 IS isicol; 126 PetscErrorCode ierr; 127 const PetscInt *r,*ic; 128 PetscInt i,*ai=a->i,*aj=a->j; 129 PetscInt *bi,*bj,*ajtmp; 130 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 131 PetscReal f; 132 PetscInt nlnk,*lnk,k,**bi_ptr; 133 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 134 PetscBT lnkbt; 135 136 PetscFunctionBegin; 137 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 138 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 139 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 140 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 141 142 /* get new row pointers */ 143 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 144 bi[0] = 0; 145 146 /* bdiag is location of diagonal in factor */ 147 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 148 bdiag[0] = 0; 149 150 /* linked list for storing column indices of the active row */ 151 nlnk = n + 1; 152 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 153 154 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 155 156 /* initial FreeSpace size is f*(ai[n]+1) */ 157 f = info->fill; 158 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 159 current_space = free_space; 160 161 for (i=0; i<n; i++) { 162 /* copy previous fill into linked list */ 163 nzi = 0; 164 nnz = ai[r[i]+1] - ai[r[i]]; 165 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 166 ajtmp = aj + ai[r[i]]; 167 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 168 nzi += nlnk; 169 170 /* add pivot rows into linked list */ 171 row = lnk[n]; 172 while (row < i) { 173 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 174 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 175 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 176 nzi += nlnk; 177 row = lnk[row]; 178 } 179 bi[i+1] = bi[i] + nzi; 180 im[i] = nzi; 181 182 /* mark bdiag */ 183 nzbd = 0; 184 nnz = nzi; 185 k = lnk[n]; 186 while (nnz-- && k < i){ 187 nzbd++; 188 k = lnk[k]; 189 } 190 bdiag[i] = bi[i] + nzbd; 191 192 /* if free space is not available, make more free space */ 193 if (current_space->local_remaining<nzi) { 194 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 195 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 196 reallocs++; 197 } 198 199 /* copy data into free space, then initialize lnk */ 200 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 201 bi_ptr[i] = current_space->array; 202 current_space->array += nzi; 203 current_space->local_used += nzi; 204 current_space->local_remaining -= nzi; 205 } 206 #if defined(PETSC_USE_INFO) 207 if (ai[n] != 0) { 208 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 209 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 210 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 211 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 212 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 213 } else { 214 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 215 } 216 #endif 217 218 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 220 221 /* destroy list of free space and other temporary array(s) */ 222 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 223 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 224 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 225 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 226 227 /* put together the new matrix */ 228 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 229 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 230 b = (Mat_SeqBAIJ*)(B)->data; 231 b->free_a = PETSC_TRUE; 232 b->free_ij = PETSC_TRUE; 233 b->singlemalloc = PETSC_FALSE; 234 ierr = PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 235 b->j = bj; 236 b->i = bi; 237 b->diag = bdiag; 238 b->ilen = 0; 239 b->imax = 0; 240 b->row = isrow; 241 b->col = iscol; 242 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 243 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 244 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 245 b->icol = isicol; 246 ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 247 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 248 249 b->maxnz = b->nz = bi[n] ; 250 (B)->factor = MAT_FACTOR_LU; 251 (B)->info.factor_mallocs = reallocs; 252 (B)->info.fill_ratio_given = f; 253 254 if (ai[n] != 0) { 255 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 256 } else { 257 (B)->info.fill_ratio_needed = 0.0; 258 } 259 260 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 261 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 262 both_identity = (PetscTruth) (row_identity && col_identity); 263 ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); 264 PetscFunctionReturn(0); 265 } 266 267