xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision ee45ca4afdde1af4d1deda7cd4dc1a4a63a3ea97)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338       /* estimate how much additional space we will need */
339       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
340       /* just double the memory each time */
341       PetscInt maxadd = jmax;
342       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
343       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
344       jmax += maxadd;
345 
346       /* allocate a longer ajnew */
347       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
348       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
349       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
350       ajnew = ajtmp;
351       reallocs++; /* count how many times we realloc */
352     }
353     ajtmp = ajnew + ainew[i];
354     fm    = fill[n];
355     nzi   = 0;
356     im[i] = nnz;
357     while (nnz--) {
358       if (fm < i) nzi++;
359       *ajtmp++ = fm ;
360       fm       = fill[fm];
361     }
362     idnew[i] = ainew[i] + nzi;
363   }
364   if (ai[n] != 0) {
365     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
366     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
370   } else {
371     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
372   }
373 
374   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
375   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
376 
377   ierr = PetscFree(fill);CHKERRQ(ierr);
378 
379   /* put together the new matrix */
380   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
381   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
382   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
383   PetscLogObjectParent(*B,isicol);
384   b    = (Mat_SeqAIJ*)(*B)->data;
385   ierr = PetscFree(b->imax);CHKERRQ(ierr);
386   b->singlemalloc = PETSC_FALSE;
387   /* the next line frees the default space generated by the Create() */
388   ierr = PetscFree(b->a);CHKERRQ(ierr);
389   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
390   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
391   b->j          = ajnew;
392   b->i          = ainew;
393   b->diag       = idnew;
394   b->ilen       = 0;
395   b->imax       = 0;
396   b->row        = isrow;
397   b->col        = iscol;
398   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
399   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
400   b->icol       = isicol;
401   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
402   /* In b structure:  Free imax, ilen, old a, old j.
403      Allocate idnew, solve_work, new a, new j */
404   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
405   b->maxnz = b->nz = ainew[n] ;
406 
407   (*B)->factor                 =  FACTOR_LU;
408   (*B)->info.factor_mallocs    = reallocs;
409   (*B)->info.fill_ratio_given  = f;
410   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
411   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
412 
413   if (ai[n] != 0) {
414     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
415   } else {
416     (*B)->info.fill_ratio_needed = 0.0;
417   }
418   PetscFunctionReturn(0);
419 }
420 /* ----------------------------------------------------------- */
421 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
425 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
426 {
427   Mat            C=*B;
428   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
429   IS             isrow = b->row,isicol = b->icol;
430   PetscErrorCode ierr;
431   PetscInt       *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j;
432   PetscInt       *ajtmpold,*ajtmp,nz,row,*ics;
433   PetscInt       *diag_offset = b->diag,diag,*pj,nshift=0;
434   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
435   PetscReal      zeropivot,rs,d,shift_nonzero;
436   PetscReal      row_shift,shift_fraction,shift_amount,shift_lo=0.,shift_hi=1.,shift_top=0.;
437   PetscTruth     lushift,shift_pd;
438 
439   PetscFunctionBegin;
440   shift_nonzero  = info->shiftnz;
441   shift_pd       = info->shiftpd;
442   zeropivot      = info->zeropivot;
443   shift_fraction = info->shift_fraction;
444 
445   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
446   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
447   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
448   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
449   rtmps = rtmp; ics = ic;
450 
451   if (!a->diag) {
452     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
453   }
454   /* if both shift schemes are chosen by user, only use shift_pd */
455   if (shift_pd && shift_nonzero) shift_nonzero = 0.0;
456   if (shift_pd) { /* set shift_top=max{row_shift} */
457     PetscInt *aai = a->i,*ddiag = a->diag;
458     shift_top = 0;
459     for (i=0; i<n; i++) {
460       d = PetscAbsScalar((a->a)[ddiag[i]]);
461       /* calculate amt of shift needed for this row */
462       if (d<=0) {
463         row_shift = 0;
464       } else {
465         row_shift = -2*d;
466       }
467       v  = a->a+aai[i];
468       nz = aai[i+1] - aai[i];
469       for (j=0; j<nz; j++)
470 	row_shift += PetscAbsScalar(v[j]);
471       if (row_shift>shift_top) shift_top = row_shift;
472     }
473   }
474 
475   /* shift_fraction = b->lu_shift_fraction; */
476   shift_amount   = 0;
477   do {
478     lushift = PETSC_FALSE;
479     for (i=0; i<n; i++){
480       nz    = ai[i+1] - ai[i];
481       ajtmp = aj + ai[i];
482       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
483 
484       /* load in initial (unfactored row) */
485       nz       = a->i[r[i]+1] - a->i[r[i]];
486       ajtmpold = a->j + a->i[r[i]];
487       v        = a->a + a->i[r[i]];
488       for (j=0; j<nz; j++) {
489         rtmp[ics[ajtmpold[j]]] = v[j];
490       }
491       rtmp[ics[r[i]]] += shift_amount; /* shift the diagonal of the matrix */
492 
493       row = *ajtmp++;
494       while  (row < i) {
495         pc = rtmp + row;
496         if (*pc != 0.0) {
497           pv         = b->a + diag_offset[row];
498           pj         = b->j + diag_offset[row] + 1;
499           multiplier = *pc / *pv++;
500           *pc        = multiplier;
501           nz         = ai[row+1] - diag_offset[row] - 1;
502           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
503           PetscLogFlops(2*nz);
504         }
505         row = *ajtmp++;
506       }
507       /* finished row so stick it into b->a */
508       pv   = b->a + ai[i] ;
509       pj   = b->j + ai[i] ;
510       nz   = ai[i+1] - ai[i];
511       diag = diag_offset[i] - ai[i];
512       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
513       rs   = 0.0;
514       for (j=0; j<nz; j++) {
515         pv[j] = rtmps[pj[j]];
516         if (j != diag) rs += PetscAbsScalar(pv[j]);
517       }
518       /* shift the diagonals when zero pivot is detected */
519 #define MAX_NSHIFT 5
520       if (PetscAbsScalar(pv[diag]) <= zeropivot*rs && shift_nonzero){
521         /* force |diag(*B)| > zeropivot*rs */
522         if (!nshift){
523           shift_amount = shift_nonzero;
524         } else {
525           shift_amount *= 2.0;
526         }
527         lushift = PETSC_TRUE;
528         nshift++;
529         break;
530       } else if (PetscRealPart(pv[diag]) <= zeropivot*rs && shift_pd){
531         /* force *B to be diagonally dominant */
532 	if (nshift>MAX_NSHIFT) {
533 	  SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner");
534 	} else if (nshift==MAX_NSHIFT) {
535 	  shift_fraction = shift_hi;
536 	  lushift        = PETSC_FALSE;
537 	} else {
538 	  shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
539 	  lushift  = PETSC_TRUE;
540 	}
541 	shift_amount = shift_fraction * shift_top;
542         nshift++;
543         break;
544       } else if (PetscAbsScalar(pv[diag]) <= zeropivot*rs){
545         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
546       }
547     }
548     if (shift_pd && !lushift && shift_fraction>0 && nshift<MAX_NSHIFT) {
549       /*
550        * if no shift in this attempt & shifting & started shifting & can refine,
551        * then try lower shift
552        */
553       shift_hi       = shift_fraction;
554       shift_fraction = (shift_hi+shift_lo)/2.;
555       shift_amount   = shift_fraction * shift_top;
556       lushift        = PETSC_TRUE;
557       nshift++;
558     }
559   } while (lushift);
560 
561   /* invert diagonal entries for simplier triangular solves */
562   for (i=0; i<n; i++) {
563     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
564   }
565 
566   ierr = PetscFree(rtmp);CHKERRQ(ierr);
567   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
568   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
569   C->factor = FACTOR_LU;
570   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
571   C->assembled = PETSC_TRUE;
572   PetscLogFlops(C->n);
573   if (nshift){
574     if (shift_nonzero) {
575       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount);
576     } else if (shift_pd) {
577       b->lu_shift_fraction = shift_fraction;
578       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift);
579     }
580   }
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
586 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
587 {
588   PetscFunctionBegin;
589   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
590   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
591   PetscFunctionReturn(0);
592 }
593 
594 
595 /* ----------------------------------------------------------- */
596 #undef __FUNCT__
597 #define __FUNCT__ "MatLUFactor_SeqAIJ"
598 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
599 {
600   PetscErrorCode ierr;
601   Mat            C;
602 
603   PetscFunctionBegin;
604   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
605   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
606   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
607   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
608   PetscFunctionReturn(0);
609 }
610 /* ----------------------------------------------------------- */
611 #undef __FUNCT__
612 #define __FUNCT__ "MatSolve_SeqAIJ"
613 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
614 {
615   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
616   IS             iscol = a->col,isrow = a->row;
617   PetscErrorCode ierr;
618   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
619   PetscInt       nz,*rout,*cout;
620   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
621 
622   PetscFunctionBegin;
623   if (!n) PetscFunctionReturn(0);
624 
625   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
626   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
627   tmp  = a->solve_work;
628 
629   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
630   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
631 
632   /* forward solve the lower triangular */
633   tmp[0] = b[*r++];
634   tmps   = tmp;
635   for (i=1; i<n; i++) {
636     v   = aa + ai[i] ;
637     vi  = aj + ai[i] ;
638     nz  = a->diag[i] - ai[i];
639     sum = b[*r++];
640     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
641     tmp[i] = sum;
642   }
643 
644   /* backward solve the upper triangular */
645   for (i=n-1; i>=0; i--){
646     v   = aa + a->diag[i] + 1;
647     vi  = aj + a->diag[i] + 1;
648     nz  = ai[i+1] - a->diag[i] - 1;
649     sum = tmp[i];
650     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
651     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
652   }
653 
654   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
655   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
656   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
657   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
658   PetscLogFlops(2*a->nz - A->n);
659   PetscFunctionReturn(0);
660 }
661 
662 /* ----------------------------------------------------------- */
663 #undef __FUNCT__
664 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
665 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
666 {
667   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
668   PetscErrorCode ierr;
669   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
670   PetscScalar    *x,*b,*aa = a->a;
671 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
672   PetscInt       adiag_i,i,*vi,nz,ai_i;
673   PetscScalar    *v,sum;
674 #endif
675 
676   PetscFunctionBegin;
677   if (!n) PetscFunctionReturn(0);
678 
679   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
680   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
681 
682 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
683   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
684 #else
685   /* forward solve the lower triangular */
686   x[0] = b[0];
687   for (i=1; i<n; i++) {
688     ai_i = ai[i];
689     v    = aa + ai_i;
690     vi   = aj + ai_i;
691     nz   = adiag[i] - ai_i;
692     sum  = b[i];
693     while (nz--) sum -= *v++ * x[*vi++];
694     x[i] = sum;
695   }
696 
697   /* backward solve the upper triangular */
698   for (i=n-1; i>=0; i--){
699     adiag_i = adiag[i];
700     v       = aa + adiag_i + 1;
701     vi      = aj + adiag_i + 1;
702     nz      = ai[i+1] - adiag_i - 1;
703     sum     = x[i];
704     while (nz--) sum -= *v++ * x[*vi++];
705     x[i]    = sum*aa[adiag_i];
706   }
707 #endif
708   PetscLogFlops(2*a->nz - A->n);
709   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
710   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
716 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
717 {
718   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
719   IS             iscol = a->col,isrow = a->row;
720   PetscErrorCode ierr;
721   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
722   PetscInt       nz,*rout,*cout;
723   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
724 
725   PetscFunctionBegin;
726   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
727 
728   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
729   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
730   tmp  = a->solve_work;
731 
732   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
733   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
734 
735   /* forward solve the lower triangular */
736   tmp[0] = b[*r++];
737   for (i=1; i<n; i++) {
738     v   = aa + ai[i] ;
739     vi  = aj + ai[i] ;
740     nz  = a->diag[i] - ai[i];
741     sum = b[*r++];
742     while (nz--) sum -= *v++ * tmp[*vi++ ];
743     tmp[i] = sum;
744   }
745 
746   /* backward solve the upper triangular */
747   for (i=n-1; i>=0; i--){
748     v   = aa + a->diag[i] + 1;
749     vi  = aj + a->diag[i] + 1;
750     nz  = ai[i+1] - a->diag[i] - 1;
751     sum = tmp[i];
752     while (nz--) sum -= *v++ * tmp[*vi++ ];
753     tmp[i] = sum*aa[a->diag[i]];
754     x[*c--] += tmp[i];
755   }
756 
757   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
758   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
759   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
760   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
761   PetscLogFlops(2*a->nz);
762 
763   PetscFunctionReturn(0);
764 }
765 /* -------------------------------------------------------------------*/
766 #undef __FUNCT__
767 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
768 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
769 {
770   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
771   IS             iscol = a->col,isrow = a->row;
772   PetscErrorCode ierr;
773   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
774   PetscInt       nz,*rout,*cout,*diag = a->diag;
775   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
776 
777   PetscFunctionBegin;
778   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
779   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
780   tmp  = a->solve_work;
781 
782   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
783   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
784 
785   /* copy the b into temp work space according to permutation */
786   for (i=0; i<n; i++) tmp[i] = b[c[i]];
787 
788   /* forward solve the U^T */
789   for (i=0; i<n; i++) {
790     v   = aa + diag[i] ;
791     vi  = aj + diag[i] + 1;
792     nz  = ai[i+1] - diag[i] - 1;
793     s1  = tmp[i];
794     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
795     while (nz--) {
796       tmp[*vi++ ] -= (*v++)*s1;
797     }
798     tmp[i] = s1;
799   }
800 
801   /* backward solve the L^T */
802   for (i=n-1; i>=0; i--){
803     v   = aa + diag[i] - 1 ;
804     vi  = aj + diag[i] - 1 ;
805     nz  = diag[i] - ai[i];
806     s1  = tmp[i];
807     while (nz--) {
808       tmp[*vi-- ] -= (*v--)*s1;
809     }
810   }
811 
812   /* copy tmp into x according to permutation */
813   for (i=0; i<n; i++) x[r[i]] = tmp[i];
814 
815   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
816   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
817   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
818   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
819 
820   PetscLogFlops(2*a->nz-A->n);
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
826 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
827 {
828   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
829   IS             iscol = a->col,isrow = a->row;
830   PetscErrorCode ierr;
831   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
832   PetscInt       nz,*rout,*cout,*diag = a->diag;
833   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
834 
835   PetscFunctionBegin;
836   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
837 
838   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
839   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
840   tmp = a->solve_work;
841 
842   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
843   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
844 
845   /* copy the b into temp work space according to permutation */
846   for (i=0; i<n; i++) tmp[i] = b[c[i]];
847 
848   /* forward solve the U^T */
849   for (i=0; i<n; i++) {
850     v   = aa + diag[i] ;
851     vi  = aj + diag[i] + 1;
852     nz  = ai[i+1] - diag[i] - 1;
853     tmp[i] *= *v++;
854     while (nz--) {
855       tmp[*vi++ ] -= (*v++)*tmp[i];
856     }
857   }
858 
859   /* backward solve the L^T */
860   for (i=n-1; i>=0; i--){
861     v   = aa + diag[i] - 1 ;
862     vi  = aj + diag[i] - 1 ;
863     nz  = diag[i] - ai[i];
864     while (nz--) {
865       tmp[*vi-- ] -= (*v--)*tmp[i];
866     }
867   }
868 
869   /* copy tmp into x according to permutation */
870   for (i=0; i<n; i++) x[r[i]] += tmp[i];
871 
872   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
873   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
874   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
875   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
876 
877   PetscLogFlops(2*a->nz);
878   PetscFunctionReturn(0);
879 }
880 /* ----------------------------------------------------------------*/
881 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
882 
883 #undef __FUNCT__
884 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
885 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
886 {
887   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
888   IS             isicol;
889   PetscErrorCode ierr;
890   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
891   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
892   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
893   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
894   PetscTruth     col_identity,row_identity;
895   PetscReal      f;
896 
897   PetscFunctionBegin;
898   f             = info->fill;
899   levels        = (PetscInt)info->levels;
900   diagonal_fill = (PetscInt)info->diagonal_fill;
901   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
902 
903   /* special case that simply copies fill pattern */
904   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
905   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
906   if (!levels && row_identity && col_identity) {
907     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
908     (*fact)->factor = FACTOR_LU;
909     b               = (Mat_SeqAIJ*)(*fact)->data;
910     if (!b->diag) {
911       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
912     }
913     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
914     b->row              = isrow;
915     b->col              = iscol;
916     b->icol             = isicol;
917     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
918     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
919     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
920     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
921     PetscFunctionReturn(0);
922   }
923 
924   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
925   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
926 
927   /* get new row pointers */
928   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
929   ainew[0] = 0;
930   /* don't know how many column pointers are needed so estimate */
931   jmax = (PetscInt)(f*(ai[n]+1));
932   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
933   /* ajfill is level of fill for each fill entry */
934   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
935   /* fill is a linked list of nonzeros in active row */
936   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
937   /* im is level for each filled value */
938   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
939   /* dloc is location of diagonal in factor */
940   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
941   dloc[0]  = 0;
942   for (prow=0; prow<n; prow++) {
943 
944     /* copy current row into linked list */
945     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
946     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
947     xi      = aj + ai[r[prow]];
948     fill[n] = n;
949     fill[prow] = -1; /* marker to indicate if diagonal exists */
950     while (nz--) {
951       fm  = n;
952       idx = ic[*xi++ ];
953       do {
954         m  = fm;
955         fm = fill[m];
956       } while (fm < idx);
957       fill[m]   = idx;
958       fill[idx] = fm;
959       im[idx]   = 0;
960     }
961 
962     /* make sure diagonal entry is included */
963     if (diagonal_fill && fill[prow] == -1) {
964       fm = n;
965       while (fill[fm] < prow) fm = fill[fm];
966       fill[prow] = fill[fm]; /* insert diagonal into linked list */
967       fill[fm]   = prow;
968       im[prow]   = 0;
969       nzf++;
970       dcount++;
971     }
972 
973     nzi = 0;
974     row = fill[n];
975     while (row < prow) {
976       incrlev = im[row] + 1;
977       nz      = dloc[row];
978       xi      = ajnew  + ainew[row]  + nz + 1;
979       flev    = ajfill + ainew[row]  + nz + 1;
980       nnz     = ainew[row+1] - ainew[row] - nz - 1;
981       fm      = row;
982       while (nnz-- > 0) {
983         idx = *xi++ ;
984         if (*flev + incrlev > levels) {
985           flev++;
986           continue;
987         }
988         do {
989           m  = fm;
990           fm = fill[m];
991         } while (fm < idx);
992         if (fm != idx) {
993           im[idx]   = *flev + incrlev;
994           fill[m]   = idx;
995           fill[idx] = fm;
996           fm        = idx;
997           nzf++;
998         } else {
999           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
1000         }
1001         flev++;
1002       }
1003       row = fill[row];
1004       nzi++;
1005     }
1006     /* copy new filled row into permanent storage */
1007     ainew[prow+1] = ainew[prow] + nzf;
1008     if (ainew[prow+1] > jmax) {
1009 
1010       /* estimate how much additional space we will need */
1011       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1012       /* just double the memory each time */
1013       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1014       PetscInt maxadd = jmax;
1015       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1016       jmax += maxadd;
1017 
1018       /* allocate a longer ajnew and ajfill */
1019       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1020       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1021       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1022       ajnew  = xi;
1023       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1024       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1025       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1026       ajfill = xi;
1027       reallocs++; /* count how many times we realloc */
1028     }
1029     xi          = ajnew + ainew[prow] ;
1030     flev        = ajfill + ainew[prow] ;
1031     dloc[prow]  = nzi;
1032     fm          = fill[n];
1033     while (nzf--) {
1034       *xi++   = fm ;
1035       *flev++ = im[fm];
1036       fm      = fill[fm];
1037     }
1038     /* make sure row has diagonal entry */
1039     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1040       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1041     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1042     }
1043   }
1044   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1045   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1046   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1047   ierr = PetscFree(fill);CHKERRQ(ierr);
1048   ierr = PetscFree(im);CHKERRQ(ierr);
1049 
1050   {
1051     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1052     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1053     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1054     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1055     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1056     if (diagonal_fill) {
1057       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1058     }
1059   }
1060 
1061   /* put together the new matrix */
1062   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1063   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1064   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1065   PetscLogObjectParent(*fact,isicol);
1066   b = (Mat_SeqAIJ*)(*fact)->data;
1067   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1068   b->singlemalloc = PETSC_FALSE;
1069   len = (ainew[n] )*sizeof(PetscScalar);
1070   /* the next line frees the default space generated by the Create() */
1071   ierr = PetscFree(b->a);CHKERRQ(ierr);
1072   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1073   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1074   b->j          = ajnew;
1075   b->i          = ainew;
1076   for (i=0; i<n; i++) dloc[i] += ainew[i];
1077   b->diag       = dloc;
1078   b->ilen       = 0;
1079   b->imax       = 0;
1080   b->row        = isrow;
1081   b->col        = iscol;
1082   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1083   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1084   b->icol       = isicol;
1085   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1086   /* In b structure:  Free imax, ilen, old a, old j.
1087      Allocate dloc, solve_work, new a, new j */
1088   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1089   b->maxnz             = b->nz = ainew[n] ;
1090   (*fact)->factor = FACTOR_LU;
1091   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1092   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1093   (*fact)->info.factor_mallocs    = reallocs;
1094   (*fact)->info.fill_ratio_given  = f;
1095   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #include "src/mat/impls/sbaij/seq/sbaij.h"
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1102 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1103 {
1104   Mat            C = *B;
1105   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1106   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1107   IS             ip=b->row;
1108   PetscErrorCode ierr;
1109   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1110   PetscInt       *ai=a->i,*aj=a->j;
1111   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1112   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1113   PetscReal      zeropivot,shift_amount,rs,shift_nonzero;
1114   PetscTruth     chshift,shift_pd;
1115   PetscInt       nshift=0;
1116 
1117   PetscFunctionBegin;
1118   shift_nonzero  = info->shiftnz;
1119   shift_pd       = info->shiftpd;
1120   zeropivot      = info->zeropivot;
1121 
1122   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1123 
1124   /* initialization */
1125   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1126   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1127   jl   = il + mbs;
1128   rtmp = (MatScalar*)(jl + mbs);
1129 
1130   shift_amount = 0;
1131   do {
1132     chshift = PETSC_FALSE;
1133     for (i=0; i<mbs; i++) {
1134       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1135     }
1136 
1137     for (k = 0; k<mbs; k++){
1138       bval = ba + bi[k];
1139       /* initialize k-th row by the perm[k]-th row of A */
1140       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1141       for (j = jmin; j < jmax; j++){
1142         col = rip[aj[j]];
1143         if (col >= k){ /* only take upper triangular entry */
1144           rtmp[col] = aa[j];
1145           *bval++  = 0.0; /* for in-place factorization */
1146         }
1147       }
1148       /* shift the diagonal of the matrix */
1149       if (nshift) rtmp[k] += shift_amount;
1150 
1151       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1152       dk = rtmp[k];
1153       i = jl[k]; /* first row to be added to k_th row  */
1154 
1155       while (i < k){
1156         nexti = jl[i]; /* next row to be added to k_th row */
1157 
1158         /* compute multiplier, update diag(k) and U(i,k) */
1159         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1160         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1161         dk += uikdi*ba[ili];
1162         ba[ili] = uikdi; /* -U(i,k) */
1163 
1164         /* add multiple of row i to k-th row */
1165         jmin = ili + 1; jmax = bi[i+1];
1166         if (jmin < jmax){
1167           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1168           /* update il and jl for row i */
1169           il[i] = jmin;
1170           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1171         }
1172         i = nexti;
1173       }
1174 
1175       /* shift the diagonals when zero pivot is detected */
1176       /* compute rs=sum of abs(off-diagonal) */
1177       rs   = 0.0;
1178       jmin = bi[k]+1;
1179       nz   = bi[k+1] - jmin;
1180       if (nz){
1181         bcol = bj + jmin;
1182         while (nz--){
1183           rs += PetscAbsScalar(rtmp[*bcol++]);
1184         }
1185       }
1186       if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){
1187         /* force |diag(*B)| > zeropivot*rs */
1188         if (!nshift){
1189           shift_amount = shift_nonzero;
1190         } else {
1191           shift_amount *= 2.0;
1192         }
1193         chshift = PETSC_TRUE;
1194         nshift++;
1195         break;
1196       } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){
1197         /* calculate a shift that would make this row diagonally dominant */
1198 	shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount);
1199 	chshift      = PETSC_TRUE;
1200 	/* Unlike in the ILU case there is no exit condition on nshift:
1201 	   we increase the shift until it converges. There is no guarantee that
1202 	   this algorithm converges faster or slower, or is better or worse
1203 	   than the ILU algorithm. */
1204 	nshift++;
1205 	break;
1206       } else if (PetscAbsScalar(dk) <= zeropivot*rs){
1207         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1208       }
1209 
1210       /* copy data into U(k,:) */
1211       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1212       jmin = bi[k]+1; jmax = bi[k+1];
1213       if (jmin < jmax) {
1214         for (j=jmin; j<jmax; j++){
1215           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1216         }
1217         /* add the k-th row into il and jl */
1218         il[k] = jmin;
1219         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1220       }
1221     }
1222   } while (chshift);
1223   ierr = PetscFree(il);CHKERRQ(ierr);
1224 
1225   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1226   C->factor       = FACTOR_CHOLESKY;
1227   C->assembled    = PETSC_TRUE;
1228   C->preallocated = PETSC_TRUE;
1229   PetscLogFlops(C->m);
1230   if (nshift){
1231     if (shift_nonzero) {
1232       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount);
1233     } else if (shift_pd) {
1234       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount);
1235     }
1236   }
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #include "petscbt.h"
1241 #include "src/mat/utils/freespace.h"
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1244 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1245 {
1246   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1247   Mat_SeqSBAIJ   *b;
1248   Mat            B;
1249   PetscErrorCode ierr;
1250   PetscTruth     perm_identity;
1251   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1252   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1253   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1254   PetscReal      fill=info->fill,levels=info->levels;
1255   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1256   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1257   PetscBT        lnkbt;
1258 
1259   PetscFunctionBegin;
1260   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1261   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1262 
1263   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1264   ui[0] = 0;
1265 
1266   /* special case that simply copies fill pattern */
1267   if (!levels && perm_identity) {
1268     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1269     for (i=0; i<am; i++) {
1270       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1271     }
1272     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1273     cols = uj;
1274     for (i=0; i<am; i++) {
1275       aj    = a->j + a->diag[i];
1276       ncols = ui[i+1] - ui[i];
1277       for (j=0; j<ncols; j++) *cols++ = *aj++;
1278     }
1279   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1280     /* initialization */
1281     ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1282 
1283     /* jl: linked list for storing indices of the pivot rows
1284        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1285     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1286     il         = jl + am;
1287     uj_ptr     = (PetscInt**)(il + am);
1288     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1289     for (i=0; i<am; i++){
1290       jl[i] = am; il[i] = 0;
1291     }
1292 
1293     /* create and initialize a linked list for storing column indices of the active row k */
1294     nlnk = am + 1;
1295     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1296 
1297     /* initial FreeSpace size is fill*(ai[am]+1) */
1298     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1299     current_space = free_space;
1300     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1301     current_space_lvl = free_space_lvl;
1302 
1303     for (k=0; k<am; k++){  /* for each active row k */
1304       /* initialize lnk by the column indices of row rip[k] of A */
1305       nzk   = 0;
1306       ncols = ai[rip[k]+1] - ai[rip[k]];
1307       ncols_upper = 0;
1308       cols        = cols_lvl + am;
1309       for (j=0; j<ncols; j++){
1310         i = rip[*(aj + ai[rip[k]] + j)];
1311         if (i >= k){ /* only take upper triangular entry */
1312           cols[ncols_upper] = i;
1313           cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
1314           ncols_upper++;
1315         }
1316       }
1317       ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1318       nzk += nlnk;
1319 
1320       /* update lnk by computing fill-in for each pivot row to be merged in */
1321       prow = jl[k]; /* 1st pivot row */
1322 
1323       while (prow < k){
1324         nextprow = jl[prow];
1325 
1326         /* merge prow into k-th row */
1327         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1328         jmax = ui[prow+1];
1329         ncols = jmax-jmin;
1330         i     = jmin - ui[prow];
1331         cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1332         for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1333         ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1334         nzk += nlnk;
1335 
1336         /* update il and jl for prow */
1337         if (jmin < jmax){
1338           il[prow] = jmin;
1339           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1340         }
1341         prow = nextprow;
1342       }
1343 
1344       /* if free space is not available, make more free space */
1345       if (current_space->local_remaining<nzk) {
1346         i = am - k + 1; /* num of unfactored rows */
1347         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1348         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1349         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1350         reallocs++;
1351       }
1352 
1353       /* copy data into free_space and free_space_lvl, then initialize lnk */
1354       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1355 
1356       /* add the k-th row into il and jl */
1357       if (nzk > 1){
1358         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1359         jl[k] = jl[i]; jl[i] = k;
1360         il[k] = ui[k] + 1;
1361       }
1362       uj_ptr[k]     = current_space->array;
1363       uj_lvl_ptr[k] = current_space_lvl->array;
1364 
1365       current_space->array           += nzk;
1366       current_space->local_used      += nzk;
1367       current_space->local_remaining -= nzk;
1368 
1369       current_space_lvl->array           += nzk;
1370       current_space_lvl->local_used      += nzk;
1371       current_space_lvl->local_remaining -= nzk;
1372 
1373       ui[k+1] = ui[k] + nzk;
1374     }
1375 
1376     if (ai[am] != 0) {
1377       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1378       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1379       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1380       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1381     } else {
1382       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1383     }
1384 
1385     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1386     ierr = PetscFree(jl);CHKERRQ(ierr);
1387     ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1388 
1389     /* destroy list of free space and other temporary array(s) */
1390     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1391     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1392     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1393     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1394 
1395   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1396 
1397   /* put together the new matrix in MATSEQSBAIJ format */
1398   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1399   B = *fact;
1400   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1401   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1402 
1403   b    = (Mat_SeqSBAIJ*)B->data;
1404   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1405   b->singlemalloc = PETSC_FALSE;
1406   /* the next line frees the default space generated by the Create() */
1407   ierr = PetscFree(b->a);CHKERRQ(ierr);
1408   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1409   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1410   b->j    = uj;
1411   b->i    = ui;
1412   b->diag = 0;
1413   b->ilen = 0;
1414   b->imax = 0;
1415   b->row  = perm;
1416   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1417   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1418   b->icol = perm;
1419   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1420   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1421   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1422   b->maxnz = b->nz = ui[am];
1423 
1424   B->factor                 = FACTOR_CHOLESKY;
1425   B->info.factor_mallocs    = reallocs;
1426   B->info.fill_ratio_given  = fill;
1427   if (ai[am] != 0) {
1428     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1429   } else {
1430     B->info.fill_ratio_needed = 0.0;
1431   }
1432   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1433   if (perm_identity){
1434     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1435     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1436   }
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1442 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1443 {
1444   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1445   Mat_SeqSBAIJ   *b;
1446   Mat            B;
1447   PetscErrorCode ierr;
1448   PetscTruth     perm_identity;
1449   PetscReal      fill = info->fill;
1450   PetscInt       *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1451   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1452   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1453   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1454   PetscBT        lnkbt;
1455   IS             iperm;
1456 
1457   PetscFunctionBegin;
1458   /* check whether perm is the identity mapping */
1459   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1460   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1461 
1462   if (!perm_identity){
1463     /* check if perm is symmetric! */
1464     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1465     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1466     for (i=0; i<mbs; i++) {
1467       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1468     }
1469     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1470     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1471   }
1472 
1473   /* initialization */
1474   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1475   ui[0] = 0;
1476 
1477   /* jl: linked list for storing indices of the pivot rows
1478      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1479   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1480   il     = jl + mbs;
1481   cols   = il + mbs;
1482   ui_ptr = (PetscInt**)(cols + mbs);
1483   for (i=0; i<mbs; i++){
1484     jl[i] = mbs; il[i] = 0;
1485   }
1486 
1487   /* create and initialize a linked list for storing column indices of the active row k */
1488   nlnk = mbs + 1;
1489   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1490 
1491   /* initial FreeSpace size is fill*(ai[mbs]+1) */
1492   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
1493   current_space = free_space;
1494 
1495   for (k=0; k<mbs; k++){  /* for each active row k */
1496     /* initialize lnk by the column indices of row rip[k] of A */
1497     nzk   = 0;
1498     ncols = ai[rip[k]+1] - ai[rip[k]];
1499     ncols_upper = 0;
1500     for (j=0; j<ncols; j++){
1501       i = rip[*(aj + ai[rip[k]] + j)];
1502       if (i >= k){ /* only take upper triangular entry */
1503         cols[ncols_upper] = i;
1504         ncols_upper++;
1505       }
1506     }
1507     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1508     nzk += nlnk;
1509 
1510     /* update lnk by computing fill-in for each pivot row to be merged in */
1511     prow = jl[k]; /* 1st pivot row */
1512 
1513     while (prow < k){
1514       nextprow = jl[prow];
1515       /* merge prow into k-th row */
1516       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1517       jmax = ui[prow+1];
1518       ncols = jmax-jmin;
1519       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1520       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1521       nzk += nlnk;
1522 
1523       /* update il and jl for prow */
1524       if (jmin < jmax){
1525         il[prow] = jmin;
1526         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1527       }
1528       prow = nextprow;
1529     }
1530 
1531     /* if free space is not available, make more free space */
1532     if (current_space->local_remaining<nzk) {
1533       i = mbs - k + 1; /* num of unfactored rows */
1534       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1535       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1536       reallocs++;
1537     }
1538 
1539     /* copy data into free space, then initialize lnk */
1540     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1541 
1542     /* add the k-th row into il and jl */
1543     if (nzk-1 > 0){
1544       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1545       jl[k] = jl[i]; jl[i] = k;
1546       il[k] = ui[k] + 1;
1547     }
1548     ui_ptr[k] = current_space->array;
1549     current_space->array           += nzk;
1550     current_space->local_used      += nzk;
1551     current_space->local_remaining -= nzk;
1552 
1553     ui[k+1] = ui[k] + nzk;
1554   }
1555 
1556   if (ai[mbs] != 0) {
1557     PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]);
1558     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1559     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1560     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1561   } else {
1562      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1563   }
1564 
1565   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1566   ierr = PetscFree(jl);CHKERRQ(ierr);
1567 
1568   /* destroy list of free space and other temporary array(s) */
1569   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1570   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1571   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1572 
1573   /* put together the new matrix in MATSEQSBAIJ format */
1574   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
1575   B    = *fact;
1576   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1577   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1578 
1579   b = (Mat_SeqSBAIJ*)B->data;
1580   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1581   b->singlemalloc = PETSC_FALSE;
1582   /* the next line frees the default space generated by the Create() */
1583   ierr = PetscFree(b->a);CHKERRQ(ierr);
1584   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1585   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1586   b->j    = uj;
1587   b->i    = ui;
1588   b->diag = 0;
1589   b->ilen = 0;
1590   b->imax = 0;
1591   b->row  = perm;
1592   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1593   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1594   b->icol = perm;
1595   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1596   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1597   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1598   b->maxnz = b->nz = ui[mbs];
1599 
1600   B->factor                 = FACTOR_CHOLESKY;
1601   B->info.factor_mallocs    = reallocs;
1602   B->info.fill_ratio_given  = fill;
1603   if (ai[mbs] != 0) {
1604     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
1605   } else {
1606     B->info.fill_ratio_needed = 0.0;
1607   }
1608   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1609   if (perm_identity){
1610     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1611     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1612   }
1613   PetscFunctionReturn(0);
1614 }
1615