xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision c3d102fe2dfc19c1832704c537829f04ceb136a2)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 #include "petscbt.h"
6 #include "src/mat/utils/freespace.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
11 {
12   PetscFunctionBegin;
13 
14   SETERRQ(PETSC_ERR_SUP,"Code not written");
15 #if !defined(PETSC_USE_DEBUG)
16   PetscFunctionReturn(0);
17 #endif
18 }
19 
20 
21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
22 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
23 
24 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
25 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
26 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   PetscInt       *c,*r,*ic,i,n = A->m;
64   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65   PetscInt       *ordcol,*iwk,*iperm,*jw;
66   PetscInt       jmax,lfill,job,*o_i,*o_j;
67   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68   PetscReal      af;
69 
70   PetscFunctionBegin;
71 
72   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
73   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
75   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
76   lfill   = (PetscInt)(info->dtcount/2.0);
77   jmax    = (PetscInt)(info->fill*a->nz);
78 
79 
80   /* ------------------------------------------------------------
81      If reorder=.TRUE., then the original matrix has to be
82      reordered to reflect the user selected ordering scheme, and
83      then de-reordered so it is in it's original format.
84      Because Saad's dperm() is NOT in place, we have to copy
85      the original matrix and allocate more storage. . .
86      ------------------------------------------------------------
87   */
88 
89   /* set reorder to true if either isrow or iscol is not identity */
90   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
91   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
92   reorder = PetscNot(reorder);
93 
94 
95   /* storage for ilu factor */
96   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
99   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
100 
101   /* ------------------------------------------------------------
102      Make sure that everything is Fortran formatted (1-Based)
103      ------------------------------------------------------------
104   */
105   for (i=old_i[0];i<old_i[n];i++) {
106     old_j[i]++;
107   }
108   for(i=0;i<n+1;i++) {
109     old_i[i]++;
110   };
111 
112 
113   if (reorder) {
114     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
115     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
116     for(i=0;i<n;i++) {
117       r[i]  = r[i]+1;
118       c[i]  = c[i]+1;
119     }
120     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
122     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
123     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124     for (i=0;i<n;i++) {
125       r[i]  = r[i]-1;
126       c[i]  = c[i]-1;
127     }
128     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
129     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
130     o_a = old_a2;
131     o_j = old_j2;
132     o_i = old_i2;
133   } else {
134     o_a = old_a;
135     o_j = old_j;
136     o_i = old_i;
137   }
138 
139   /* ------------------------------------------------------------
140      Call Saad's ilutp() routine to generate the factorization
141      ------------------------------------------------------------
142   */
143 
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
145   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
146   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
147 
148   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149   if (sierr) {
150     switch (sierr) {
151       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
152       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
153       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157     }
158   }
159 
160   ierr = PetscFree(w);CHKERRQ(ierr);
161   ierr = PetscFree(jw);CHKERRQ(ierr);
162 
163   /* ------------------------------------------------------------
164      Saad's routine gives the result in Modified Sparse Row (msr)
165      Convert to Compressed Sparse Row format (csr)
166      ------------------------------------------------------------
167   */
168 
169   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
170   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
171 
172   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
173 
174   ierr = PetscFree(iwk);CHKERRQ(ierr);
175   ierr = PetscFree(wk);CHKERRQ(ierr);
176 
177   if (reorder) {
178     ierr = PetscFree(old_a2);CHKERRQ(ierr);
179     ierr = PetscFree(old_j2);CHKERRQ(ierr);
180     ierr = PetscFree(old_i2);CHKERRQ(ierr);
181   } else {
182     /* fix permutation of old_j that the factorization introduced */
183     for (i=old_i[0]; i<old_i[n]; i++) {
184       old_j[i-1] = iperm[old_j[i-1]-1];
185     }
186   }
187 
188   /* get rid of the shift to indices starting at 1 */
189   for (i=0; i<n+1; i++) {
190     old_i[i]--;
191   }
192   for (i=old_i[0];i<old_i[n];i++) {
193     old_j[i]--;
194   }
195 
196   /* Make the factored matrix 0-based */
197   for (i=0; i<n+1; i++) {
198     new_i[i]--;
199   }
200   for (i=new_i[0];i<new_i[n];i++) {
201     new_j[i]--;
202   }
203 
204   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205   /*-- permute the right-hand-side and solution vectors           --*/
206   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
207   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
208   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
209   for(i=0; i<n; i++) {
210     ordcol[i] = ic[iperm[i]-1];
211   };
212   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
213   ierr = ISDestroy(isicol);CHKERRQ(ierr);
214 
215   ierr = PetscFree(iperm);CHKERRQ(ierr);
216 
217   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
218   ierr = PetscFree(ordcol);CHKERRQ(ierr);
219 
220   /*----- put together the new matrix -----*/
221 
222   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
223   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
224   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
225   (*fact)->factor    = FACTOR_LU;
226   (*fact)->assembled = PETSC_TRUE;
227 
228   b = (Mat_SeqAIJ*)(*fact)->data;
229   ierr = PetscFree(b->imax);CHKERRQ(ierr);
230   b->sorted        = PETSC_FALSE;
231   b->singlemalloc  = PETSC_FALSE;
232   /* the next line frees the default space generated by the MatCreate() */
233   ierr             = PetscFree(b->a);CHKERRQ(ierr);
234   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
235   b->a             = new_a;
236   b->j             = new_j;
237   b->i             = new_i;
238   b->ilen          = 0;
239   b->imax          = 0;
240   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241   b->row           = isirow;
242   b->col           = iscolf;
243   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
244   b->maxnz = b->nz = new_i[n];
245   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
246   (*fact)->info.factor_mallocs = 0;
247 
248   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
249 
250   /* check out for identical nodes. If found, use inode functions */
251   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
252 
253   af = ((double)b->nz)/((double)a->nz) + .001;
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
256   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
257   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
258 
259   PetscFunctionReturn(0);
260 #endif
261 }
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
265 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
266 {
267   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
268   IS             isicol;
269   PetscErrorCode ierr;
270   PetscInt       *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
271   PetscInt       *bi,*bj,*ajtmp;
272   PetscInt       *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
273   PetscReal      f;
274   PetscInt       nlnk,*lnk,k,*cols,**bi_ptr;
275   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
276   PetscBT        lnkbt;
277 
278   PetscFunctionBegin;
279   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
280   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
281   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
282   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
283 
284   /* get new row pointers */
285   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
286   bi[0] = 0;
287 
288   /* bdiag is location of diagonal in factor */
289   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
290   bdiag[0] = 0;
291 
292   /* linked list for storing column indices of the active row */
293   nlnk = n + 1;
294   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
295 
296   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr);
297   im     = cols + n;
298   bi_ptr = (PetscInt**)(im + n);
299 
300   /* initial FreeSpace size is f*(ai[n]+1) */
301   f = info->fill;
302   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
303   current_space = free_space;
304 
305   for (i=0; i<n; i++) {
306     /* copy previous fill into linked list */
307     nzi = 0;
308     nnz = ai[r[i]+1] - ai[r[i]];
309     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
310     ajtmp = aj + ai[r[i]];
311     for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
312     ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
313     nzi += nlnk;
314 
315     /* add pivot rows into linked list */
316     row = lnk[n];
317     while (row < i) {
318       nzbd    = bdiag[row] - bi[row] + 1;
319       ajtmp   = bi_ptr[row] + nzbd;
320       nnz     = im[row] - nzbd; /* num of columns with row<indices<=i */
321       im[row] = nzbd;
322       ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr);
323       nzi     += nlnk;
324       im[row] += nzbd;  /* update im[row]: num of cols with index<=i */
325 
326       row = lnk[row];
327     }
328 
329     bi[i+1] = bi[i] + nzi;
330     im[i]   = nzi;
331 
332     /* mark bdiag */
333     nzbd = 0;
334     nnz  = nzi;
335     k    = lnk[n];
336     while (nnz-- && k < i){
337       nzbd++;
338       k = lnk[k];
339     }
340     bdiag[i] = bi[i] + nzbd;
341 
342     /* if free space is not available, make more free space */
343     if (current_space->local_remaining<nzi) {
344       nnz = (n - i)*nzi; /* estimated and max additional space needed */
345       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
346       reallocs++;
347     }
348 
349     /* copy data into free space, then initialize lnk */
350     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
351     bi_ptr[i] = current_space->array;
352     current_space->array           += nzi;
353     current_space->local_used      += nzi;
354     current_space->local_remaining -= nzi;
355   }
356   if (ai[n] != 0) {
357     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
358     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
359     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
360     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
361     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
362   } else {
363     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
364   }
365 
366   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
367   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
368 
369   /* destroy list of free space and other temporary array(s) */
370   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
371   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
372   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
373   ierr = PetscFree(cols);CHKERRQ(ierr);
374 
375   /* put together the new matrix */
376   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
377   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
378   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
379   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
380   b    = (Mat_SeqAIJ*)(*B)->data;
381   ierr = PetscFree(b->imax);CHKERRQ(ierr);
382   b->singlemalloc = PETSC_FALSE;
383   /* the next line frees the default space generated by the Create() */
384   ierr = PetscFree(b->a);CHKERRQ(ierr);
385   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
386   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
387   b->j          = bj;
388   b->i          = bi;
389   b->diag       = bdiag;
390   b->ilen       = 0;
391   b->imax       = 0;
392   b->row        = isrow;
393   b->col        = iscol;
394   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
395   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
396   b->icol       = isicol;
397   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
398 
399   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
400   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
401   b->maxnz = b->nz = bi[n] ;
402 
403   (*B)->factor                 =  FACTOR_LU;
404   (*B)->info.factor_mallocs    = reallocs;
405   (*B)->info.fill_ratio_given  = f;
406   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
407   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
408 
409   if (ai[n] != 0) {
410     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
411   } else {
412     (*B)->info.fill_ratio_needed = 0.0;
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 /* ----------------------------------------------------------- */
418 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
422 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
423 {
424   Mat            C=*B;
425   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
426   IS             isrow = b->row,isicol = b->icol;
427   PetscErrorCode ierr;
428   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
429   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
430   PetscInt       *diag_offset = b->diag,diag,*pj;
431   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
432   PetscReal      zeropivot,rs,d,shift_nz;
433   PetscReal      row_shift,shift_top=0.;
434   PetscTruth     shift_pd;
435   LUShift_Ctx    sctx;
436   PetscInt       newshift;
437 
438   PetscFunctionBegin;
439   shift_nz  = info->shiftnz;
440   shift_pd  = info->shiftpd;
441   zeropivot = info->zeropivot;
442 
443   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
444   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
445   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
446   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
447   rtmps = rtmp; ics = ic;
448 
449   if (!a->diag) {
450     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
451   }
452   /* if both shift schemes are chosen by user, only use shift_pd */
453   if (shift_pd && shift_nz) shift_nz = 0.0;
454   if (shift_pd) { /* set shift_top=max{row_shift} */
455     PetscInt *aai = a->i,*ddiag = a->diag;
456     shift_top = 0;
457     for (i=0; i<n; i++) {
458       d = PetscAbsScalar((a->a)[ddiag[i]]);
459       /* calculate amt of shift needed for this row */
460       if (d<=0) {
461         row_shift = 0;
462       } else {
463         row_shift = -2*d;
464       }
465       v  = a->a+aai[i];
466       nz = aai[i+1] - aai[i];
467       for (j=0; j<nz; j++)
468 	row_shift += PetscAbsScalar(v[j]);
469       if (row_shift>shift_top) shift_top = row_shift;
470     }
471   }
472 
473   sctx.shift_amount = 0;
474   sctx.shift_top    = shift_top;
475   sctx.nshift       = 0;
476   sctx.nshift_max   = 5;
477   sctx.shift_lo     = 0.;
478   sctx.shift_hi     = 1.;
479   do {
480     sctx.lushift = PETSC_FALSE;
481     for (i=0; i<n; i++){
482       nz    = bi[i+1] - bi[i];
483       bjtmp = bj + bi[i];
484       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
485 
486       /* load in initial (unfactored row) */
487       nz    = a->i[r[i]+1] - a->i[r[i]];
488       ajtmp = a->j + a->i[r[i]];
489       v     = a->a + a->i[r[i]];
490       for (j=0; j<nz; j++) {
491         rtmp[ics[ajtmp[j]]] = v[j];
492       }
493       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
494 
495       row = *bjtmp++;
496       while  (row < i) {
497         pc = rtmp + row;
498         if (*pc != 0.0) {
499           pv         = b->a + diag_offset[row];
500           pj         = b->j + diag_offset[row] + 1;
501           multiplier = *pc / *pv++;
502           *pc        = multiplier;
503           nz         = bi[row+1] - diag_offset[row] - 1;
504           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
505           PetscLogFlops(2*nz);
506         }
507         row = *bjtmp++;
508       }
509       /* finished row so stick it into b->a */
510       pv   = b->a + bi[i] ;
511       pj   = b->j + bi[i] ;
512       nz   = bi[i+1] - bi[i];
513       diag = diag_offset[i] - bi[i];
514       rs   = 0.0;
515       for (j=0; j<nz; j++) {
516         pv[j] = rtmps[pj[j]];
517         if (j != diag) rs += PetscAbsScalar(pv[j]);
518       }
519 
520       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
521       sctx.rs  = rs;
522       sctx.pv  = pv[diag];
523       ierr = Mat_LUCheckShift(info,&sctx,&newshift);CHKERRQ(ierr);
524       if (newshift == 1){
525         break;    /* sctx.shift_amount is updated */
526       } else if (newshift == -1){
527         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
528       }
529     }
530 
531     if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
532       /*
533        * if no shift in this attempt & shifting & started shifting & can refine,
534        * then try lower shift
535        */
536       sctx.shift_hi       = info->shift_fraction;
537       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
538       sctx.shift_amount   = info->shift_fraction * sctx.shift_top;
539       sctx.lushift        = PETSC_TRUE;
540       sctx.nshift++;
541     }
542   } while (sctx.lushift);
543 
544   /* invert diagonal entries for simplier triangular solves */
545   for (i=0; i<n; i++) {
546     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
547   }
548 
549   ierr = PetscFree(rtmp);CHKERRQ(ierr);
550   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
551   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
552   C->factor = FACTOR_LU;
553   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
554   C->assembled = PETSC_TRUE;
555   PetscLogFlops(C->n);
556   if (sctx.nshift){
557     if (shift_nz) {
558       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
559     } else if (shift_pd) {
560       b->lu_shift_fraction = info->shift_fraction;
561       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift);
562     }
563   }
564   PetscFunctionReturn(0);
565 }
566 
567 #undef __FUNCT__
568 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
569 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
570 {
571   PetscFunctionBegin;
572   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
573   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
574   PetscFunctionReturn(0);
575 }
576 
577 
578 /* ----------------------------------------------------------- */
579 #undef __FUNCT__
580 #define __FUNCT__ "MatLUFactor_SeqAIJ"
581 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
582 {
583   PetscErrorCode ierr;
584   Mat            C;
585 
586   PetscFunctionBegin;
587   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
588   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
589   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
590   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 /* ----------------------------------------------------------- */
594 #undef __FUNCT__
595 #define __FUNCT__ "MatSolve_SeqAIJ"
596 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
597 {
598   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
599   IS             iscol = a->col,isrow = a->row;
600   PetscErrorCode ierr;
601   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
602   PetscInt       nz,*rout,*cout;
603   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
604 
605   PetscFunctionBegin;
606   if (!n) PetscFunctionReturn(0);
607 
608   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
609   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
610   tmp  = a->solve_work;
611 
612   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
613   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
614 
615   /* forward solve the lower triangular */
616   tmp[0] = b[*r++];
617   tmps   = tmp;
618   for (i=1; i<n; i++) {
619     v   = aa + ai[i] ;
620     vi  = aj + ai[i] ;
621     nz  = a->diag[i] - ai[i];
622     sum = b[*r++];
623     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
624     tmp[i] = sum;
625   }
626 
627   /* backward solve the upper triangular */
628   for (i=n-1; i>=0; i--){
629     v   = aa + a->diag[i] + 1;
630     vi  = aj + a->diag[i] + 1;
631     nz  = ai[i+1] - a->diag[i] - 1;
632     sum = tmp[i];
633     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
634     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
635   }
636 
637   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
638   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
639   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
640   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
641   PetscLogFlops(2*a->nz - A->n);
642   PetscFunctionReturn(0);
643 }
644 
645 /* ----------------------------------------------------------- */
646 #undef __FUNCT__
647 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
648 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
649 {
650   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
651   PetscErrorCode ierr;
652   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
653   PetscScalar    *x,*b,*aa = a->a;
654 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
655   PetscInt       adiag_i,i,*vi,nz,ai_i;
656   PetscScalar    *v,sum;
657 #endif
658 
659   PetscFunctionBegin;
660   if (!n) PetscFunctionReturn(0);
661 
662   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
663   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
664 
665 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
666   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
667 #else
668   /* forward solve the lower triangular */
669   x[0] = b[0];
670   for (i=1; i<n; i++) {
671     ai_i = ai[i];
672     v    = aa + ai_i;
673     vi   = aj + ai_i;
674     nz   = adiag[i] - ai_i;
675     sum  = b[i];
676     while (nz--) sum -= *v++ * x[*vi++];
677     x[i] = sum;
678   }
679 
680   /* backward solve the upper triangular */
681   for (i=n-1; i>=0; i--){
682     adiag_i = adiag[i];
683     v       = aa + adiag_i + 1;
684     vi      = aj + adiag_i + 1;
685     nz      = ai[i+1] - adiag_i - 1;
686     sum     = x[i];
687     while (nz--) sum -= *v++ * x[*vi++];
688     x[i]    = sum*aa[adiag_i];
689   }
690 #endif
691   PetscLogFlops(2*a->nz - A->n);
692   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
693   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNCT__
698 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
699 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
700 {
701   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
702   IS             iscol = a->col,isrow = a->row;
703   PetscErrorCode ierr;
704   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
705   PetscInt       nz,*rout,*cout;
706   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
707 
708   PetscFunctionBegin;
709   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
710 
711   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
712   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
713   tmp  = a->solve_work;
714 
715   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
716   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
717 
718   /* forward solve the lower triangular */
719   tmp[0] = b[*r++];
720   for (i=1; i<n; i++) {
721     v   = aa + ai[i] ;
722     vi  = aj + ai[i] ;
723     nz  = a->diag[i] - ai[i];
724     sum = b[*r++];
725     while (nz--) sum -= *v++ * tmp[*vi++ ];
726     tmp[i] = sum;
727   }
728 
729   /* backward solve the upper triangular */
730   for (i=n-1; i>=0; i--){
731     v   = aa + a->diag[i] + 1;
732     vi  = aj + a->diag[i] + 1;
733     nz  = ai[i+1] - a->diag[i] - 1;
734     sum = tmp[i];
735     while (nz--) sum -= *v++ * tmp[*vi++ ];
736     tmp[i] = sum*aa[a->diag[i]];
737     x[*c--] += tmp[i];
738   }
739 
740   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
741   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
742   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
743   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
744   PetscLogFlops(2*a->nz);
745 
746   PetscFunctionReturn(0);
747 }
748 /* -------------------------------------------------------------------*/
749 #undef __FUNCT__
750 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
751 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
752 {
753   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
754   IS             iscol = a->col,isrow = a->row;
755   PetscErrorCode ierr;
756   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
757   PetscInt       nz,*rout,*cout,*diag = a->diag;
758   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
759 
760   PetscFunctionBegin;
761   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
762   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
763   tmp  = a->solve_work;
764 
765   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
766   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
767 
768   /* copy the b into temp work space according to permutation */
769   for (i=0; i<n; i++) tmp[i] = b[c[i]];
770 
771   /* forward solve the U^T */
772   for (i=0; i<n; i++) {
773     v   = aa + diag[i] ;
774     vi  = aj + diag[i] + 1;
775     nz  = ai[i+1] - diag[i] - 1;
776     s1  = tmp[i];
777     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
778     while (nz--) {
779       tmp[*vi++ ] -= (*v++)*s1;
780     }
781     tmp[i] = s1;
782   }
783 
784   /* backward solve the L^T */
785   for (i=n-1; i>=0; i--){
786     v   = aa + diag[i] - 1 ;
787     vi  = aj + diag[i] - 1 ;
788     nz  = diag[i] - ai[i];
789     s1  = tmp[i];
790     while (nz--) {
791       tmp[*vi-- ] -= (*v--)*s1;
792     }
793   }
794 
795   /* copy tmp into x according to permutation */
796   for (i=0; i<n; i++) x[r[i]] = tmp[i];
797 
798   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
799   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
800   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
801   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
802 
803   PetscLogFlops(2*a->nz-A->n);
804   PetscFunctionReturn(0);
805 }
806 
807 #undef __FUNCT__
808 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
809 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
810 {
811   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
812   IS             iscol = a->col,isrow = a->row;
813   PetscErrorCode ierr;
814   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
815   PetscInt       nz,*rout,*cout,*diag = a->diag;
816   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
817 
818   PetscFunctionBegin;
819   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
820 
821   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
822   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
823   tmp = a->solve_work;
824 
825   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
826   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
827 
828   /* copy the b into temp work space according to permutation */
829   for (i=0; i<n; i++) tmp[i] = b[c[i]];
830 
831   /* forward solve the U^T */
832   for (i=0; i<n; i++) {
833     v   = aa + diag[i] ;
834     vi  = aj + diag[i] + 1;
835     nz  = ai[i+1] - diag[i] - 1;
836     tmp[i] *= *v++;
837     while (nz--) {
838       tmp[*vi++ ] -= (*v++)*tmp[i];
839     }
840   }
841 
842   /* backward solve the L^T */
843   for (i=n-1; i>=0; i--){
844     v   = aa + diag[i] - 1 ;
845     vi  = aj + diag[i] - 1 ;
846     nz  = diag[i] - ai[i];
847     while (nz--) {
848       tmp[*vi-- ] -= (*v--)*tmp[i];
849     }
850   }
851 
852   /* copy tmp into x according to permutation */
853   for (i=0; i<n; i++) x[r[i]] += tmp[i];
854 
855   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
856   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
857   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
858   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
859 
860   PetscLogFlops(2*a->nz);
861   PetscFunctionReturn(0);
862 }
863 /* ----------------------------------------------------------------*/
864 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
868 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
869 {
870   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
871   IS             isicol;
872   PetscErrorCode ierr;
873   PetscInt       *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
874   PetscInt       *bi,*cols,nnz,*cols_lvl;
875   PetscInt       *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
876   PetscInt       i,levels,diagonal_fill;
877   PetscTruth     col_identity,row_identity;
878   PetscReal      f;
879   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
880   PetscBT        lnkbt;
881   PetscInt       nzi,*bj,**bj_ptr,**bjlvl_ptr;
882   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
883   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
884 
885   PetscFunctionBegin;
886   f             = info->fill;
887   levels        = (PetscInt)info->levels;
888   diagonal_fill = (PetscInt)info->diagonal_fill;
889   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
890 
891   /* special case that simply copies fill pattern */
892   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
893   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
894   if (!levels && row_identity && col_identity) {
895     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
896     (*fact)->factor = FACTOR_LU;
897     b               = (Mat_SeqAIJ*)(*fact)->data;
898     if (!b->diag) {
899       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
900     }
901     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
902     b->row              = isrow;
903     b->col              = iscol;
904     b->icol             = isicol;
905     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
906     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
907     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
908     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
909     PetscFunctionReturn(0);
910   }
911 
912   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
913   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
914 
915   /* get new row pointers */
916   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
917   bi[0] = 0;
918   /* bdiag is location of diagonal in factor */
919   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
920   bdiag[0]  = 0;
921 
922   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
923   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
924 
925   /* create a linked list for storing column indices of the active row */
926   nlnk = n + 1;
927   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
928 
929   /* initial FreeSpace size is f*(ai[n]+1) */
930   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
931   current_space = free_space;
932   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
933   current_space_lvl = free_space_lvl;
934 
935   for (i=0; i<n; i++) {
936     nzi = 0;
937     /* copy current row into linked list */
938     nnz  = ai[r[i]+1] - ai[r[i]];
939     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
940     cols = aj + ai[r[i]];
941     lnk[i] = -1; /* marker to indicate if diagonal exists */
942     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
943     nzi += nlnk;
944 
945     /* make sure diagonal entry is included */
946     if (diagonal_fill && lnk[i] == -1) {
947       fm = n;
948       while (lnk[fm] < i) fm = lnk[fm];
949       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
950       lnk[fm]    = i;
951       lnk_lvl[i] = 0;
952       nzi++; dcount++;
953     }
954 
955     /* add pivot rows into the active row */
956     nzbd = 0;
957     prow = lnk[n];
958     while (prow < i) {
959       nnz      = bdiag[prow];
960       cols     = bj_ptr[prow] + nnz + 1;
961       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
962       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
963       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
964       nzi += nlnk;
965       prow = lnk[prow];
966       nzbd++;
967     }
968     bdiag[i] = nzbd;
969     bi[i+1]  = bi[i] + nzi;
970 
971     /* if free space is not available, make more free space */
972     if (current_space->local_remaining<nzi) {
973       nnz = nzi*(n - i); /* estimated and max additional space needed */
974       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
975       ierr = GetMoreSpace(nnz,&current_space_lvl);CHKERRQ(ierr);
976       reallocs++;
977     }
978 
979     /* copy data into free_space and free_space_lvl, then initialize lnk */
980     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
981     bj_ptr[i]    = current_space->array;
982     bjlvl_ptr[i] = current_space_lvl->array;
983 
984     /* make sure the active row i has diagonal entry */
985     if (*(bj_ptr[i]+bdiag[i]) != i) {
986       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
987     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
988     }
989 
990     current_space->array           += nzi;
991     current_space->local_used      += nzi;
992     current_space->local_remaining -= nzi;
993     current_space_lvl->array           += nzi;
994     current_space_lvl->local_used      += nzi;
995     current_space_lvl->local_remaining -= nzi;
996   }
997 
998   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
999   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1000 
1001   /* destroy list of free space and other temporary arrays */
1002   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1003   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1004   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1005   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1006   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1007 
1008   {
1009     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1010     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1011     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1012     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1013     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1014     if (diagonal_fill) {
1015       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1016     }
1017   }
1018 
1019   /* put together the new matrix */
1020   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1021   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1022   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1023   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1024   b = (Mat_SeqAIJ*)(*fact)->data;
1025   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1026   b->singlemalloc = PETSC_FALSE;
1027   len = (bi[n] )*sizeof(PetscScalar);
1028   /* the next line frees the default space generated by the Create() */
1029   ierr = PetscFree(b->a);CHKERRQ(ierr);
1030   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1031   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1032   b->j          = bj;
1033   b->i          = bi;
1034   for (i=0; i<n; i++) bdiag[i] += bi[i];
1035   b->diag       = bdiag;
1036   b->ilen       = 0;
1037   b->imax       = 0;
1038   b->row        = isrow;
1039   b->col        = iscol;
1040   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1041   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1042   b->icol       = isicol;
1043   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1044   /* In b structure:  Free imax, ilen, old a, old j.
1045      Allocate bdiag, solve_work, new a, new j */
1046   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1047   b->maxnz             = b->nz = bi[n] ;
1048   (*fact)->factor = FACTOR_LU;
1049   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1050   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1051   (*fact)->info.factor_mallocs    = reallocs;
1052   (*fact)->info.fill_ratio_given  = f;
1053   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #include "src/mat/impls/sbaij/seq/sbaij.h"
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1060 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1061 {
1062   Mat            C = *B;
1063   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1064   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1065   IS             ip=b->row;
1066   PetscErrorCode ierr;
1067   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1068   PetscInt       *ai=a->i,*aj=a->j;
1069   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1070   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1071   PetscReal      zeropivot,rs,shiftnz;
1072   PetscTruth     shiftpd;
1073   ChShift_Ctx    sctx;
1074   PetscInt       newshift;
1075 
1076   PetscFunctionBegin;
1077   shiftnz   = info->shiftnz;
1078   shiftpd   = info->shiftpd;
1079   zeropivot = info->zeropivot;
1080 
1081   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1082 
1083   /* initialization */
1084   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1085   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1086   jl   = il + mbs;
1087   rtmp = (MatScalar*)(jl + mbs);
1088 
1089   sctx.shift_amount = 0;
1090   sctx.nshift       = 0;
1091   do {
1092     sctx.chshift = PETSC_FALSE;
1093     for (i=0; i<mbs; i++) {
1094       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1095     }
1096 
1097     for (k = 0; k<mbs; k++){
1098       bval = ba + bi[k];
1099       /* initialize k-th row by the perm[k]-th row of A */
1100       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1101       for (j = jmin; j < jmax; j++){
1102         col = rip[aj[j]];
1103         if (col >= k){ /* only take upper triangular entry */
1104           rtmp[col] = aa[j];
1105           *bval++  = 0.0; /* for in-place factorization */
1106         }
1107       }
1108       /* shift the diagonal of the matrix */
1109       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1110 
1111       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1112       dk = rtmp[k];
1113       i = jl[k]; /* first row to be added to k_th row  */
1114 
1115       while (i < k){
1116         nexti = jl[i]; /* next row to be added to k_th row */
1117 
1118         /* compute multiplier, update diag(k) and U(i,k) */
1119         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1120         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1121         dk += uikdi*ba[ili];
1122         ba[ili] = uikdi; /* -U(i,k) */
1123 
1124         /* add multiple of row i to k-th row */
1125         jmin = ili + 1; jmax = bi[i+1];
1126         if (jmin < jmax){
1127           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1128           /* update il and jl for row i */
1129           il[i] = jmin;
1130           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1131         }
1132         i = nexti;
1133       }
1134 
1135       /* shift the diagonals when zero pivot is detected */
1136       /* compute rs=sum of abs(off-diagonal) */
1137       rs   = 0.0;
1138       jmin = bi[k]+1;
1139       nz   = bi[k+1] - jmin;
1140       if (nz){
1141         bcol = bj + jmin;
1142         while (nz--){
1143           rs += PetscAbsScalar(rtmp[*bcol]);
1144           bcol++;
1145         }
1146       }
1147 
1148       sctx.rs = rs;
1149       sctx.pv = dk;
1150       ierr = Mat_CholeskyCheckShift(info,&sctx,&newshift);CHKERRQ(ierr);
1151       if (newshift == 1){
1152         break;    /* sctx.shift_amount is updated */
1153       } else if (newshift == -1){
1154         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1155       }
1156 
1157       /* copy data into U(k,:) */
1158       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1159       jmin = bi[k]+1; jmax = bi[k+1];
1160       if (jmin < jmax) {
1161         for (j=jmin; j<jmax; j++){
1162           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1163         }
1164         /* add the k-th row into il and jl */
1165         il[k] = jmin;
1166         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1167       }
1168     }
1169   } while (sctx.chshift);
1170   ierr = PetscFree(il);CHKERRQ(ierr);
1171 
1172   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1173   C->factor       = FACTOR_CHOLESKY;
1174   C->assembled    = PETSC_TRUE;
1175   C->preallocated = PETSC_TRUE;
1176   PetscLogFlops(C->m);
1177   if (sctx.nshift){
1178     if (shiftnz) {
1179       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1180     } else if (shiftpd) {
1181       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1182     }
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1189 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1190 {
1191   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1192   Mat_SeqSBAIJ   *b;
1193   Mat            B;
1194   PetscErrorCode ierr;
1195   PetscTruth     perm_identity;
1196   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1197   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1198   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1199   PetscInt       ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1200   PetscReal      fill=info->fill,levels=info->levels;
1201   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1202   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1203   PetscBT        lnkbt;
1204 
1205   PetscFunctionBegin;
1206   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1207   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1208 
1209   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1210   ui[0] = 0;
1211 
1212   /* special case that simply copies fill pattern */
1213   if (!levels && perm_identity) {
1214     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1215     for (i=0; i<am; i++) {
1216       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1217     }
1218     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1219     cols = uj;
1220     for (i=0; i<am; i++) {
1221       aj    = a->j + a->diag[i];
1222       ncols = ui[i+1] - ui[i];
1223       for (j=0; j<ncols; j++) *cols++ = *aj++;
1224     }
1225   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1226     /* initialization */
1227     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1228 
1229     /* jl: linked list for storing indices of the pivot rows
1230        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1231     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1232     il         = jl + am;
1233     uj_ptr     = (PetscInt**)(il + am);
1234     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1235     for (i=0; i<am; i++){
1236       jl[i] = am; il[i] = 0;
1237     }
1238 
1239     /* create and initialize a linked list for storing column indices of the active row k */
1240     nlnk = am + 1;
1241     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1242 
1243     /* initial FreeSpace size is fill*(ai[am]+1) */
1244     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1245     current_space = free_space;
1246     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1247     current_space_lvl = free_space_lvl;
1248 
1249     for (k=0; k<am; k++){  /* for each active row k */
1250       /* initialize lnk by the column indices of row rip[k] of A */
1251       nzk   = 0;
1252       ncols = ai[rip[k]+1] - ai[rip[k]];
1253       ncols_upper = 0;
1254       for (j=0; j<ncols; j++){
1255         i = *(aj + ai[rip[k]] + j);
1256         if (rip[i] >= k){ /* only take upper triangular entry */
1257           ajtmp[ncols_upper] = i;
1258           ncols_upper++;
1259         }
1260       }
1261       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1262       nzk += nlnk;
1263 
1264       /* update lnk by computing fill-in for each pivot row to be merged in */
1265       prow = jl[k]; /* 1st pivot row */
1266 
1267       while (prow < k){
1268         nextprow = jl[prow];
1269 
1270         /* merge prow into k-th row */
1271         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1272         jmax = ui[prow+1];
1273         ncols = jmax-jmin;
1274         i     = jmin - ui[prow];
1275         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1276         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1277         j     = *(uj - 1);
1278         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1279         nzk += nlnk;
1280 
1281         /* update il and jl for prow */
1282         if (jmin < jmax){
1283           il[prow] = jmin;
1284           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1285         }
1286         prow = nextprow;
1287       }
1288 
1289       /* if free space is not available, make more free space */
1290       if (current_space->local_remaining<nzk) {
1291         i = am - k + 1; /* num of unfactored rows */
1292         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1293         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1294         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1295         reallocs++;
1296       }
1297 
1298       /* copy data into free_space and free_space_lvl, then initialize lnk */
1299       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1300 
1301       /* add the k-th row into il and jl */
1302       if (nzk > 1){
1303         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1304         jl[k] = jl[i]; jl[i] = k;
1305         il[k] = ui[k] + 1;
1306       }
1307       uj_ptr[k]     = current_space->array;
1308       uj_lvl_ptr[k] = current_space_lvl->array;
1309 
1310       current_space->array           += nzk;
1311       current_space->local_used      += nzk;
1312       current_space->local_remaining -= nzk;
1313 
1314       current_space_lvl->array           += nzk;
1315       current_space_lvl->local_used      += nzk;
1316       current_space_lvl->local_remaining -= nzk;
1317 
1318       ui[k+1] = ui[k] + nzk;
1319     }
1320 
1321     if (ai[am] != 0) {
1322       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1323       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1324       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1325       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1326     } else {
1327       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1328     }
1329 
1330     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1331     ierr = PetscFree(jl);CHKERRQ(ierr);
1332     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1333 
1334     /* destroy list of free space and other temporary array(s) */
1335     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1336     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1337     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1338     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1339 
1340   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1341 
1342   /* put together the new matrix in MATSEQSBAIJ format */
1343   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1344   B = *fact;
1345   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1346   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1347 
1348   b    = (Mat_SeqSBAIJ*)B->data;
1349   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1350   b->singlemalloc = PETSC_FALSE;
1351   /* the next line frees the default space generated by the Create() */
1352   ierr = PetscFree(b->a);CHKERRQ(ierr);
1353   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1354   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1355   b->j    = uj;
1356   b->i    = ui;
1357   b->diag = 0;
1358   b->ilen = 0;
1359   b->imax = 0;
1360   b->row  = perm;
1361   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1362   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1363   b->icol = perm;
1364   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1365   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1366   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1367   b->maxnz = b->nz = ui[am];
1368 
1369   B->factor                 = FACTOR_CHOLESKY;
1370   B->info.factor_mallocs    = reallocs;
1371   B->info.fill_ratio_given  = fill;
1372   if (ai[am] != 0) {
1373     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1374   } else {
1375     B->info.fill_ratio_needed = 0.0;
1376   }
1377   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1378   if (perm_identity){
1379     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1380     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1387 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1388 {
1389   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1390   Mat_SeqSBAIJ   *b;
1391   Mat            B;
1392   PetscErrorCode ierr;
1393   PetscTruth     perm_identity;
1394   PetscReal      fill = info->fill;
1395   PetscInt       *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1396   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1397   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1398   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1399   PetscBT        lnkbt;
1400   IS             iperm;
1401 
1402   PetscFunctionBegin;
1403   /* check whether perm is the identity mapping */
1404   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1405   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1406 
1407   if (!perm_identity){
1408     /* check if perm is symmetric! */
1409     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1410     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1411     for (i=0; i<am; i++) {
1412       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1413     }
1414     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1415     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1416   }
1417 
1418   /* initialization */
1419   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1420   ui[0] = 0;
1421 
1422   /* jl: linked list for storing indices of the pivot rows
1423      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1424   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1425   il     = jl + am;
1426   cols   = il + am;
1427   ui_ptr = (PetscInt**)(cols + am);
1428   for (i=0; i<am; i++){
1429     jl[i] = am; il[i] = 0;
1430   }
1431 
1432   /* create and initialize a linked list for storing column indices of the active row k */
1433   nlnk = am + 1;
1434   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1435 
1436   /* initial FreeSpace size is fill*(ai[am]+1) */
1437   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1438   current_space = free_space;
1439 
1440   for (k=0; k<am; k++){  /* for each active row k */
1441     /* initialize lnk by the column indices of row rip[k] of A */
1442     nzk   = 0;
1443     ncols = ai[rip[k]+1] - ai[rip[k]];
1444     ncols_upper = 0;
1445     for (j=0; j<ncols; j++){
1446       i = rip[*(aj + ai[rip[k]] + j)];
1447       if (i >= k){ /* only take upper triangular entry */
1448         cols[ncols_upper] = i;
1449         ncols_upper++;
1450       }
1451     }
1452     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1453     nzk += nlnk;
1454 
1455     /* update lnk by computing fill-in for each pivot row to be merged in */
1456     prow = jl[k]; /* 1st pivot row */
1457 
1458     while (prow < k){
1459       nextprow = jl[prow];
1460       /* merge prow into k-th row */
1461       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1462       jmax = ui[prow+1];
1463       ncols = jmax-jmin;
1464       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1465       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1466       nzk += nlnk;
1467 
1468       /* update il and jl for prow */
1469       if (jmin < jmax){
1470         il[prow] = jmin;
1471         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1472       }
1473       prow = nextprow;
1474     }
1475 
1476     /* if free space is not available, make more free space */
1477     if (current_space->local_remaining<nzk) {
1478       i = am - k + 1; /* num of unfactored rows */
1479       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1480       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1481       reallocs++;
1482     }
1483 
1484     /* copy data into free space, then initialize lnk */
1485     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1486 
1487     /* add the k-th row into il and jl */
1488     if (nzk-1 > 0){
1489       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1490       jl[k] = jl[i]; jl[i] = k;
1491       il[k] = ui[k] + 1;
1492     }
1493     ui_ptr[k] = current_space->array;
1494     current_space->array           += nzk;
1495     current_space->local_used      += nzk;
1496     current_space->local_remaining -= nzk;
1497 
1498     ui[k+1] = ui[k] + nzk;
1499   }
1500 
1501   if (ai[am] != 0) {
1502     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1503     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1504     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1505     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1506   } else {
1507      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1508   }
1509 
1510   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1511   ierr = PetscFree(jl);CHKERRQ(ierr);
1512 
1513   /* destroy list of free space and other temporary array(s) */
1514   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1515   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1516   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1517 
1518   /* put together the new matrix in MATSEQSBAIJ format */
1519   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1520   B    = *fact;
1521   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1522   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1523 
1524   b = (Mat_SeqSBAIJ*)B->data;
1525   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1526   b->singlemalloc = PETSC_FALSE;
1527   /* the next line frees the default space generated by the Create() */
1528   ierr = PetscFree(b->a);CHKERRQ(ierr);
1529   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1530   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1531   b->j    = uj;
1532   b->i    = ui;
1533   b->diag = 0;
1534   b->ilen = 0;
1535   b->imax = 0;
1536   b->row  = perm;
1537   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1538   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1539   b->icol = perm;
1540   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1541   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1542   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1543   b->maxnz = b->nz = ui[am];
1544 
1545   B->factor                 = FACTOR_CHOLESKY;
1546   B->info.factor_mallocs    = reallocs;
1547   B->info.fill_ratio_given  = fill;
1548   if (ai[am] != 0) {
1549     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1550   } else {
1551     B->info.fill_ratio_needed = 0.0;
1552   }
1553   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1554   if (perm_identity){
1555     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1556     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1557   }
1558   PetscFunctionReturn(0);
1559 }
1560