1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for BAIJ format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" 7 #include "src/inline/ilu.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatSeqBAIJSetNumericFactorization" 11 /* 12 This is used to set the numeric factorization for both LU and ILU symbolic factorization 13 */ 14 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural) 15 { 16 PetscFunctionBegin; 17 if (natural) { 18 switch (inA->rmap->bs) { 19 case 1: 20 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 21 break; 22 case 2: 23 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 24 break; 25 case 3: 26 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 27 break; 28 case 4: 29 #if defined(PETSC_USE_MAT_SINGLE) 30 { 31 PetscTruth sse_enabled_local; 32 PetscErrorCode ierr; 33 ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr); 34 if (sse_enabled_local) { 35 # if defined(PETSC_HAVE_SSE) 36 int i,*AJ=a->j,nz=a->nz,n=a->mbs; 37 if (n==(unsigned short)n) { 38 unsigned short *aj=(unsigned short *)AJ; 39 for (i=0;i<nz;i++) { 40 aj[i] = (unsigned short)AJ[i]; 41 } 42 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 43 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 44 ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr); 45 } else { 46 /* Scale the column indices for easier indexing in MatSolve. */ 47 /* for (i=0;i<nz;i++) { */ 48 /* AJ[i] = AJ[i]*4; */ 49 /* } */ 50 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 51 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 52 ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr); 53 } 54 # else 55 /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 56 SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable"); 57 # endif 58 } else { 59 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 60 } 61 } 62 #else 63 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 64 #endif 65 break; 66 case 5: 67 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 68 break; 69 case 6: 70 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 71 break; 72 case 7: 73 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 74 break; 75 default: 76 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 77 break; 78 } 79 } else { 80 switch (inA->rmap->bs) { 81 case 1: 82 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 83 break; 84 case 2: 85 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 86 break; 87 case 3: 88 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 89 break; 90 case 4: 91 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 92 break; 93 case 5: 94 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 95 break; 96 case 6: 97 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 98 break; 99 case 7: 100 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 101 break; 102 default: 103 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 104 break; 105 } 106 } 107 PetscFunctionReturn(0); 108 } 109 110 /* 111 The symbolic factorization code is identical to that for AIJ format, 112 except for very small changes since this is now a SeqBAIJ datastructure. 113 NOT good code reuse. 114 */ 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ" 117 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 118 { 119 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 120 IS isicol; 121 PetscErrorCode ierr; 122 PetscInt *r,*ic,i,n = a->mbs,*ai = a->i,*aj = a->j; 123 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz,bs = A->rmap->bs,bs2=a->bs2; 124 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 125 PetscReal f = 1.0; 126 PetscTruth row_identity,col_identity; 127 128 PetscFunctionBegin; 129 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 130 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 131 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 132 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 133 134 f = info->fill; 135 /* get new row pointers */ 136 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 137 ainew[0] = 0; 138 /* don't know how many column pointers are needed so estimate */ 139 jmax = (PetscInt)(f*ai[n] + 1); 140 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 141 /* fill is a linked list of nonzeros in active row */ 142 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 143 im = fill + n + 1; 144 /* idnew is location of diagonal in factor */ 145 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 146 idnew[0] = 0; 147 148 for (i=0; i<n; i++) { 149 /* first copy previous fill into linked list */ 150 nnz = nz = ai[r[i]+1] - ai[r[i]]; 151 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 152 ajtmp = aj + ai[r[i]]; 153 fill[n] = n; 154 while (nz--) { 155 fm = n; 156 idx = ic[*ajtmp++]; 157 do { 158 m = fm; 159 fm = fill[m]; 160 } while (fm < idx); 161 fill[m] = idx; 162 fill[idx] = fm; 163 } 164 row = fill[n]; 165 while (row < i) { 166 ajtmp = ajnew + idnew[row] + 1; 167 nzbd = 1 + idnew[row] - ainew[row]; 168 nz = im[row] - nzbd; 169 fm = row; 170 while (nz-- > 0) { 171 idx = *ajtmp++; 172 nzbd++; 173 if (idx == i) im[row] = nzbd; 174 do { 175 m = fm; 176 fm = fill[m]; 177 } while (fm < idx); 178 if (fm != idx) { 179 fill[m] = idx; 180 fill[idx] = fm; 181 fm = idx; 182 nnz++; 183 } 184 } 185 row = fill[row]; 186 } 187 /* copy new filled row into permanent storage */ 188 ainew[i+1] = ainew[i] + nnz; 189 if (ainew[i+1] > jmax) { 190 191 /* estimate how much additional space we will need */ 192 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 193 /* just double the memory each time */ 194 PetscInt maxadd = jmax; 195 /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */ 196 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 197 jmax += maxadd; 198 199 /* allocate a longer ajnew */ 200 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 201 ierr = PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(PetscInt));CHKERRQ(ierr); 202 ierr = PetscFree(ajnew);CHKERRQ(ierr); 203 ajnew = ajtmp; 204 reallocs++; /* count how many times we realloc */ 205 } 206 ajtmp = ajnew + ainew[i]; 207 fm = fill[n]; 208 nzi = 0; 209 im[i] = nnz; 210 while (nnz--) { 211 if (fm < i) nzi++; 212 *ajtmp++ = fm; 213 fm = fill[fm]; 214 } 215 idnew[i] = ainew[i] + nzi; 216 } 217 218 #if defined(PETSC_USE_INFO) 219 if (ai[n] != 0) { 220 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 221 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 222 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 223 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 224 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 225 } else { 226 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 227 } 228 #endif 229 230 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 231 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 232 233 ierr = PetscFree(fill);CHKERRQ(ierr); 234 235 /* put together the new matrix */ 236 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 237 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 238 b = (Mat_SeqBAIJ*)(*B)->data; 239 b->singlemalloc = PETSC_FALSE; 240 b->free_a = PETSC_TRUE; 241 b->free_ij = PETSC_TRUE; 242 ierr = PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 243 b->j = ajnew; 244 b->i = ainew; 245 b->diag = idnew; 246 b->ilen = 0; 247 b->imax = 0; 248 b->row = isrow; 249 b->col = iscol; 250 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 251 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 252 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 253 b->icol = isicol; 254 ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 255 /* In b structure: Free imax, ilen, old a, old j. 256 Allocate idnew, solve_work, new a, new j */ 257 ierr = PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 258 b->maxnz = b->nz = ainew[n]; 259 260 (*B)->factor = MAT_FACTOR_LU; 261 (*B)->info.factor_mallocs = reallocs; 262 (*B)->info.fill_ratio_given = f; 263 if (ai[n] != 0) { 264 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 265 } else { 266 (*B)->info.fill_ratio_needed = 0.0; 267 } 268 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 269 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 270 ierr = MatSeqBAIJSetNumericFactorization(*B,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274