xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision a4efd8ea4bb0600a5aef92b2c23cbfa314827193)
1 #define PETSCMAT_DLL
2 
3 /*
4     Factorization code for BAIJ format.
5 */
6 #include "src/mat/impls/baij/seq/baij.h"
7 #include "src/inline/ilu.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatSeqBAIJSetNumericFactorization"
11 /*
12    This is used to set the numeric factorization for both LU and ILU symbolic factorization
13 */
14 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural)
15 {
16   PetscFunctionBegin;
17   if (natural) {
18     switch (inA->rmap->bs) {
19     case 1:
20       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
21       break;
22     case 2:
23       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
24       break;
25     case 3:
26       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
27       break;
28     case 4:
29 #if defined(PETSC_USE_MAT_SINGLE)
30       {
31         PetscTruth  sse_enabled_local;
32         PetscErrorCode ierr;
33         ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr);
34         if (sse_enabled_local) {
35 #  if defined(PETSC_HAVE_SSE)
36           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
37           if (n==(unsigned short)n) {
38             unsigned short *aj=(unsigned short *)AJ;
39             for (i=0;i<nz;i++) {
40               aj[i] = (unsigned short)AJ[i];
41             }
42             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
43             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
44             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr);
45           } else {
46         /* Scale the column indices for easier indexing in MatSolve. */
47 /*            for (i=0;i<nz;i++) { */
48 /*              AJ[i] = AJ[i]*4; */
49 /*            } */
50             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
51             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
52             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr);
53           }
54 #  else
55         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
56           SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable");
57 #  endif
58         } else {
59           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
60         }
61       }
62 #else
63       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
64 #endif
65       break;
66     case 5:
67       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
68       break;
69     case 6:
70       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
71       break;
72     case 7:
73       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
74       break;
75     default:
76       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
77       break;
78     }
79   } else {
80     switch (inA->rmap->bs) {
81     case 1:
82       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
83       break;
84     case 2:
85       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
86       break;
87     case 3:
88       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
89       break;
90     case 4:
91       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
92       break;
93     case 5:
94       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
95       break;
96     case 6:
97       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
98       break;
99     case 7:
100       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
101       break;
102     default:
103       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
104       break;
105     }
106   }
107   PetscFunctionReturn(0);
108 }
109 
110 /*
111     The symbolic factorization code is identical to that for AIJ format,
112   except for very small changes since this is now a SeqBAIJ datastructure.
113   NOT good code reuse.
114 */
115 #undef __FUNCT__
116 #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ"
117 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
118 {
119   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b;
120   IS             isicol;
121   PetscErrorCode ierr;
122   PetscInt       *r,*ic,i,n = a->mbs,*ai = a->i,*aj = a->j;
123   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz,bs = A->rmap->bs,bs2=a->bs2;
124   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
125   PetscReal      f = 1.0;
126   PetscTruth     row_identity,col_identity;
127 
128   PetscFunctionBegin;
129   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
130   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
131   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
132   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
133 
134   f = info->fill;
135   /* get new row pointers */
136   ierr     = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
137   ainew[0] = 0;
138   /* don't know how many column pointers are needed so estimate */
139   jmax     = (PetscInt)(f*ai[n] + 1);
140   ierr     = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
141   /* fill is a linked list of nonzeros in active row */
142   ierr     = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
143   im       = fill + n + 1;
144   /* idnew is location of diagonal in factor */
145   ierr     = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
146   idnew[0] = 0;
147 
148   for (i=0; i<n; i++) {
149     /* first copy previous fill into linked list */
150     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
151     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
152     ajtmp   = aj + ai[r[i]];
153     fill[n] = n;
154     while (nz--) {
155       fm  = n;
156       idx = ic[*ajtmp++];
157       do {
158         m  = fm;
159         fm = fill[m];
160       } while (fm < idx);
161       fill[m]   = idx;
162       fill[idx] = fm;
163     }
164     row = fill[n];
165     while (row < i) {
166       ajtmp = ajnew + idnew[row] + 1;
167       nzbd  = 1 + idnew[row] - ainew[row];
168       nz    = im[row] - nzbd;
169       fm    = row;
170       while (nz-- > 0) {
171         idx = *ajtmp++;
172         nzbd++;
173         if (idx == i) im[row] = nzbd;
174         do {
175           m  = fm;
176           fm = fill[m];
177         } while (fm < idx);
178         if (fm != idx) {
179           fill[m]   = idx;
180           fill[idx] = fm;
181           fm        = idx;
182           nnz++;
183         }
184       }
185       row = fill[row];
186     }
187     /* copy new filled row into permanent storage */
188     ainew[i+1] = ainew[i] + nnz;
189     if (ainew[i+1] > jmax) {
190 
191       /* estimate how much additional space we will need */
192       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
193       /* just double the memory each time */
194       PetscInt maxadd = jmax;
195       /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */
196       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
197       jmax += maxadd;
198 
199       /* allocate a longer ajnew */
200       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
201       ierr  = PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(PetscInt));CHKERRQ(ierr);
202       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
203       ajnew = ajtmp;
204       reallocs++; /* count how many times we realloc */
205     }
206     ajtmp = ajnew + ainew[i];
207     fm    = fill[n];
208     nzi   = 0;
209     im[i] = nnz;
210     while (nnz--) {
211       if (fm < i) nzi++;
212       *ajtmp++ = fm;
213       fm       = fill[fm];
214     }
215     idnew[i] = ainew[i] + nzi;
216   }
217 
218 #if defined(PETSC_USE_INFO)
219   if (ai[n] != 0) {
220     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
221     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
222     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
223     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
224     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
225   } else {
226     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
227   }
228 #endif
229 
230   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
231   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
232 
233   ierr = PetscFree(fill);CHKERRQ(ierr);
234 
235   /* put together the new matrix */
236   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
237   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
238   b = (Mat_SeqBAIJ*)(*B)->data;
239   b->singlemalloc = PETSC_FALSE;
240   b->free_a     = PETSC_TRUE;
241   b->free_ij    = PETSC_TRUE;
242   ierr          = PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
243   b->j          = ajnew;
244   b->i          = ainew;
245   b->diag       = idnew;
246   b->ilen       = 0;
247   b->imax       = 0;
248   b->row        = isrow;
249   b->col        = iscol;
250   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
251   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
252   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
253   b->icol       = isicol;
254   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
255   /* In b structure:  Free imax, ilen, old a, old j.
256      Allocate idnew, solve_work, new a, new j */
257   ierr = PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
258   b->maxnz = b->nz = ainew[n];
259 
260   (*B)->factor                 = MAT_FACTOR_LU;
261   (*B)->info.factor_mallocs    = reallocs;
262   (*B)->info.fill_ratio_given  = f;
263   if (ai[n] != 0) {
264     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
265   } else {
266     (*B)->info.fill_ratio_needed = 0.0;
267   }
268   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
269   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
270   ierr = MatSeqBAIJSetNumericFactorization(*B,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272  }
273 
274