xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision e2df7a95c5ea77c899beea10ff9effd6061e7c8f)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
23 
24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
28 #endif
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
32   /* ------------------------------------------------------------
33 
34           This interface was contribed by Tony Caola
35 
36      This routine is an interface to the pivoting drop-tolerance
37      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
38      SPARSEKIT2.
39 
40      The SPARSEKIT2 routines used here are covered by the GNU
41      copyright; see the file gnu in this directory.
42 
43      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
44      help in getting this routine ironed out.
45 
46      The major drawback to this routine is that if info->fill is
47      not large enough it fails rather than allocating more space;
48      this can be fixed by hacking/improving the f2c version of
49      Yousef Saad's code.
50 
51      ------------------------------------------------------------
52 */
53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
54 {
55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
56   PetscFunctionBegin;
57   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
58   You can obtain the drop tolerance routines by installing PETSc from\n\
59   www.mcs.anl.gov/petsc\n");
60 #else
61   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
62   IS             iscolf,isicol,isirow;
63   PetscTruth     reorder;
64   PetscErrorCode ierr,sierr;
65   PetscInt       *c,*r,*ic,i,n = A->m;
66   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
67   PetscInt       *ordcol,*iwk,*iperm,*jw;
68   PetscInt       jmax,lfill,job,*o_i,*o_j;
69   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
70   PetscReal      af;
71 
72   PetscFunctionBegin;
73 
74   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
75   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
76   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
77   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
78   lfill   = (PetscInt)(info->dtcount/2.0);
79   jmax    = (PetscInt)(info->fill*a->nz);
80 
81 
82   /* ------------------------------------------------------------
83      If reorder=.TRUE., then the original matrix has to be
84      reordered to reflect the user selected ordering scheme, and
85      then de-reordered so it is in it's original format.
86      Because Saad's dperm() is NOT in place, we have to copy
87      the original matrix and allocate more storage. . .
88      ------------------------------------------------------------
89   */
90 
91   /* set reorder to true if either isrow or iscol is not identity */
92   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
93   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
94   reorder = PetscNot(reorder);
95 
96 
97   /* storage for ilu factor */
98   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
100   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
101   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
102 
103   /* ------------------------------------------------------------
104      Make sure that everything is Fortran formatted (1-Based)
105      ------------------------------------------------------------
106   */
107   for (i=old_i[0];i<old_i[n];i++) {
108     old_j[i]++;
109   }
110   for(i=0;i<n+1;i++) {
111     old_i[i]++;
112   };
113 
114 
115   if (reorder) {
116     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
117     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       r[i]  = r[i]+1;
120       c[i]  = c[i]+1;
121     }
122     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
123     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
124     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
125     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126     for (i=0;i<n;i++) {
127       r[i]  = r[i]-1;
128       c[i]  = c[i]-1;
129     }
130     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
131     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
132     o_a = old_a2;
133     o_j = old_j2;
134     o_i = old_i2;
135   } else {
136     o_a = old_a;
137     o_j = old_j;
138     o_i = old_i;
139   }
140 
141   /* ------------------------------------------------------------
142      Call Saad's ilutp() routine to generate the factorization
143      ------------------------------------------------------------
144   */
145 
146   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
147   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
148   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
149 
150   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151   if (sierr) {
152     switch (sierr) {
153       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
154       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
155       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159     }
160   }
161 
162   ierr = PetscFree(w);CHKERRQ(ierr);
163   ierr = PetscFree(jw);CHKERRQ(ierr);
164 
165   /* ------------------------------------------------------------
166      Saad's routine gives the result in Modified Sparse Row (msr)
167      Convert to Compressed Sparse Row format (csr)
168      ------------------------------------------------------------
169   */
170 
171   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
172   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
173 
174   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
175 
176   ierr = PetscFree(iwk);CHKERRQ(ierr);
177   ierr = PetscFree(wk);CHKERRQ(ierr);
178 
179   if (reorder) {
180     ierr = PetscFree(old_a2);CHKERRQ(ierr);
181     ierr = PetscFree(old_j2);CHKERRQ(ierr);
182     ierr = PetscFree(old_i2);CHKERRQ(ierr);
183   } else {
184     /* fix permutation of old_j that the factorization introduced */
185     for (i=old_i[0]; i<old_i[n]; i++) {
186       old_j[i-1] = iperm[old_j[i-1]-1];
187     }
188   }
189 
190   /* get rid of the shift to indices starting at 1 */
191   for (i=0; i<n+1; i++) {
192     old_i[i]--;
193   }
194   for (i=old_i[0];i<old_i[n];i++) {
195     old_j[i]--;
196   }
197 
198   /* Make the factored matrix 0-based */
199   for (i=0; i<n+1; i++) {
200     new_i[i]--;
201   }
202   for (i=new_i[0];i<new_i[n];i++) {
203     new_j[i]--;
204   }
205 
206   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207   /*-- permute the right-hand-side and solution vectors           --*/
208   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
209   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
210   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
211   for(i=0; i<n; i++) {
212     ordcol[i] = ic[iperm[i]-1];
213   };
214   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
215   ierr = ISDestroy(isicol);CHKERRQ(ierr);
216 
217   ierr = PetscFree(iperm);CHKERRQ(ierr);
218 
219   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
220   ierr = PetscFree(ordcol);CHKERRQ(ierr);
221 
222   /*----- put together the new matrix -----*/
223 
224   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
225   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
226   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
227   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
228   (*fact)->factor    = FACTOR_LU;
229   (*fact)->assembled = PETSC_TRUE;
230 
231   b = (Mat_SeqAIJ*)(*fact)->data;
232   b->freedata      = PETSC_TRUE;
233   b->sorted        = PETSC_FALSE;
234   b->singlemalloc  = PETSC_FALSE;
235   b->a             = new_a;
236   b->j             = new_j;
237   b->i             = new_i;
238   b->ilen          = 0;
239   b->imax          = 0;
240   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241   b->row           = isirow;
242   b->col           = iscolf;
243   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
244   b->maxnz = b->nz = new_i[n];
245   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
246   (*fact)->info.factor_mallocs = 0;
247 
248   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
249 
250   af = ((double)b->nz)/((double)a->nz) + .001;
251   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr);
252   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr);
253   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
254   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
255 
256   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
257 
258   PetscFunctionReturn(0);
259 #endif
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265 {
266   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
267   IS             isicol;
268   PetscErrorCode ierr;
269   PetscInt       *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
270   PetscInt       *bi,*bj,*ajtmp;
271   PetscInt       *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272   PetscReal      f;
273   PetscInt       nlnk,*lnk,k,*cols,**bi_ptr;
274   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
275   PetscBT        lnkbt;
276 
277   PetscFunctionBegin;
278   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
280   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
281   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
282 
283   /* get new row pointers */
284   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
285   bi[0] = 0;
286 
287   /* bdiag is location of diagonal in factor */
288   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
289   bdiag[0] = 0;
290 
291   /* linked list for storing column indices of the active row */
292   nlnk = n + 1;
293   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
294 
295   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr);
296   im     = cols + n;
297   bi_ptr = (PetscInt**)(im + n);
298 
299   /* initial FreeSpace size is f*(ai[n]+1) */
300   f = info->fill;
301   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
302   current_space = free_space;
303 
304   for (i=0; i<n; i++) {
305     /* copy previous fill into linked list */
306     nzi = 0;
307     nnz = ai[r[i]+1] - ai[r[i]];
308     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
309     ajtmp = aj + ai[r[i]];
310     for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
311     ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
312     nzi += nlnk;
313 
314     /* add pivot rows into linked list */
315     row = lnk[n];
316     while (row < i) {
317       nzbd    = bdiag[row] - bi[row] + 1;
318       ajtmp   = bi_ptr[row] + nzbd;
319       nnz     = im[row] - nzbd; /* num of columns with row<indices<=i */
320       im[row] = nzbd;
321       ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr);
322       nzi     += nlnk;
323       im[row] += nzbd;  /* update im[row]: num of cols with index<=i */
324 
325       row = lnk[row];
326     }
327 
328     bi[i+1] = bi[i] + nzi;
329     im[i]   = nzi;
330 
331     /* mark bdiag */
332     nzbd = 0;
333     nnz  = nzi;
334     k    = lnk[n];
335     while (nnz-- && k < i){
336       nzbd++;
337       k = lnk[k];
338     }
339     bdiag[i] = bi[i] + nzbd;
340 
341     /* if free space is not available, make more free space */
342     if (current_space->local_remaining<nzi) {
343       nnz = (n - i)*nzi; /* estimated and max additional space needed */
344       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
345       reallocs++;
346     }
347 
348     /* copy data into free space, then initialize lnk */
349     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
350     bi_ptr[i] = current_space->array;
351     current_space->array           += nzi;
352     current_space->local_used      += nzi;
353     current_space->local_remaining -= nzi;
354   }
355 #if defined(PETSC_USE_DEBUG)
356   if (ai[n] != 0) {
357     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
358     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
359     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr);
360     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
361     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
362   } else {
363     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr);
364   }
365 #endif
366 
367   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
368   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
369 
370   /* destroy list of free space and other temporary array(s) */
371   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
372   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
373   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
374   ierr = PetscFree(cols);CHKERRQ(ierr);
375 
376   /* put together the new matrix */
377   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
378   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
379   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
380   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
381   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
382   b    = (Mat_SeqAIJ*)(*B)->data;
383   b->freedata     = PETSC_TRUE;
384   b->singlemalloc = PETSC_FALSE;
385   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
386   b->j          = bj;
387   b->i          = bi;
388   b->diag       = bdiag;
389   b->ilen       = 0;
390   b->imax       = 0;
391   b->row        = isrow;
392   b->col        = iscol;
393   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
394   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
395   b->icol       = isicol;
396   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
397 
398   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
399   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
400   b->maxnz = b->nz = bi[n] ;
401 
402   (*B)->factor                 =  FACTOR_LU;
403   (*B)->info.factor_mallocs    = reallocs;
404   (*B)->info.fill_ratio_given  = f;
405 
406   if (ai[n] != 0) {
407     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
408   } else {
409     (*B)->info.fill_ratio_needed = 0.0;
410   }
411   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
412   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
413   PetscFunctionReturn(0);
414 }
415 
416 /* ----------------------------------------------------------- */
417 #undef __FUNCT__
418 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
419 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
420 {
421   Mat            C=*B;
422   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
423   IS             isrow = b->row,isicol = b->icol;
424   PetscErrorCode ierr;
425   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
426   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
427   PetscInt       *diag_offset = b->diag,diag,*pj;
428   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
429   PetscScalar    d;
430   PetscReal      rs;
431   LUShift_Ctx    sctx;
432   PetscInt       newshift;
433 
434   PetscFunctionBegin;
435   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
436   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
437   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
438   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
439   rtmps = rtmp; ics = ic;
440 
441   if (!a->diag) {
442     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
443   }
444   /* if both shift schemes are chosen by user, only use info->shiftpd */
445   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
446   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
447     PetscInt *aai = a->i,*ddiag = a->diag;
448     sctx.shift_top = 0;
449     for (i=0; i<n; i++) {
450       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
451       d  = (a->a)[ddiag[i]];
452       rs = -PetscAbsScalar(d) - PetscRealPart(d);
453       v  = a->a+aai[i];
454       nz = aai[i+1] - aai[i];
455       for (j=0; j<nz; j++)
456 	rs += PetscAbsScalar(v[j]);
457       if (rs>sctx.shift_top) sctx.shift_top = rs;
458     }
459     if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12;
460     sctx.shift_top    *= 1.1;
461     sctx.nshift_max   = 5;
462     sctx.shift_lo     = 0.;
463     sctx.shift_hi     = 1.;
464   }
465 
466   sctx.shift_amount = 0;
467   sctx.nshift       = 0;
468   do {
469     sctx.lushift = PETSC_FALSE;
470     for (i=0; i<n; i++){
471       nz    = bi[i+1] - bi[i];
472       bjtmp = bj + bi[i];
473       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
474 
475       /* load in initial (unfactored row) */
476       nz    = a->i[r[i]+1] - a->i[r[i]];
477       ajtmp = a->j + a->i[r[i]];
478       v     = a->a + a->i[r[i]];
479       for (j=0; j<nz; j++) {
480         rtmp[ics[ajtmp[j]]] = v[j];
481       }
482       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
483 
484       row = *bjtmp++;
485       while  (row < i) {
486         pc = rtmp + row;
487         if (*pc != 0.0) {
488           pv         = b->a + diag_offset[row];
489           pj         = b->j + diag_offset[row] + 1;
490           multiplier = *pc / *pv++;
491           *pc        = multiplier;
492           nz         = bi[row+1] - diag_offset[row] - 1;
493           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
494           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
495         }
496         row = *bjtmp++;
497       }
498       /* finished row so stick it into b->a */
499       pv   = b->a + bi[i] ;
500       pj   = b->j + bi[i] ;
501       nz   = bi[i+1] - bi[i];
502       diag = diag_offset[i] - bi[i];
503       rs   = 0.0;
504       for (j=0; j<nz; j++) {
505         pv[j] = rtmps[pj[j]];
506         if (j != diag) rs += PetscAbsScalar(pv[j]);
507       }
508 
509       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
510       sctx.rs  = rs;
511       sctx.pv  = pv[diag];
512       ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
513       if (newshift == 1){
514         break;    /* sctx.shift_amount is updated */
515       } else if (newshift == -1){
516         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
517       }
518     }
519 
520     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
521       /*
522        * if no shift in this attempt & shifting & started shifting & can refine,
523        * then try lower shift
524        */
525       sctx.shift_hi        = info->shift_fraction;
526       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
527       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
528       sctx.lushift         = PETSC_TRUE;
529       sctx.nshift++;
530     }
531   } while (sctx.lushift);
532 
533   /* invert diagonal entries for simplier triangular solves */
534   for (i=0; i<n; i++) {
535     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
536   }
537 
538   ierr = PetscFree(rtmp);CHKERRQ(ierr);
539   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
540   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
541   C->factor = FACTOR_LU;
542   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
543   C->assembled = PETSC_TRUE;
544   ierr = PetscLogFlops(C->n);CHKERRQ(ierr);
545   if (sctx.nshift){
546     if (info->shiftnz) {
547       ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
548     } else if (info->shiftpd) {
549       ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr);
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
557 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
558 {
559   PetscFunctionBegin;
560   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
561   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
562   PetscFunctionReturn(0);
563 }
564 
565 
566 /* ----------------------------------------------------------- */
567 #undef __FUNCT__
568 #define __FUNCT__ "MatLUFactor_SeqAIJ"
569 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
570 {
571   PetscErrorCode ierr;
572   Mat            C;
573 
574   PetscFunctionBegin;
575   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
576   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
577   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
578   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 /* ----------------------------------------------------------- */
582 #undef __FUNCT__
583 #define __FUNCT__ "MatSolve_SeqAIJ"
584 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
585 {
586   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
587   IS             iscol = a->col,isrow = a->row;
588   PetscErrorCode ierr;
589   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
590   PetscInt       nz,*rout,*cout;
591   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
592 
593   PetscFunctionBegin;
594   if (!n) PetscFunctionReturn(0);
595 
596   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
597   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
598   tmp  = a->solve_work;
599 
600   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
601   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
602 
603   /* forward solve the lower triangular */
604   tmp[0] = b[*r++];
605   tmps   = tmp;
606   for (i=1; i<n; i++) {
607     v   = aa + ai[i] ;
608     vi  = aj + ai[i] ;
609     nz  = a->diag[i] - ai[i];
610     sum = b[*r++];
611     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
612     tmp[i] = sum;
613   }
614 
615   /* backward solve the upper triangular */
616   for (i=n-1; i>=0; i--){
617     v   = aa + a->diag[i] + 1;
618     vi  = aj + a->diag[i] + 1;
619     nz  = ai[i+1] - a->diag[i] - 1;
620     sum = tmp[i];
621     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
622     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
623   }
624 
625   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
626   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
627   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
628   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
629   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 /* ----------------------------------------------------------- */
634 #undef __FUNCT__
635 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
636 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
637 {
638   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
639   PetscErrorCode ierr;
640   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
641   PetscScalar    *x,*b,*aa = a->a;
642 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
643   PetscInt       adiag_i,i,*vi,nz,ai_i;
644   PetscScalar    *v,sum;
645 #endif
646 
647   PetscFunctionBegin;
648   if (!n) PetscFunctionReturn(0);
649 
650   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
651   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
652 
653 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
654   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
655 #else
656   /* forward solve the lower triangular */
657   x[0] = b[0];
658   for (i=1; i<n; i++) {
659     ai_i = ai[i];
660     v    = aa + ai_i;
661     vi   = aj + ai_i;
662     nz   = adiag[i] - ai_i;
663     sum  = b[i];
664     while (nz--) sum -= *v++ * x[*vi++];
665     x[i] = sum;
666   }
667 
668   /* backward solve the upper triangular */
669   for (i=n-1; i>=0; i--){
670     adiag_i = adiag[i];
671     v       = aa + adiag_i + 1;
672     vi      = aj + adiag_i + 1;
673     nz      = ai[i+1] - adiag_i - 1;
674     sum     = x[i];
675     while (nz--) sum -= *v++ * x[*vi++];
676     x[i]    = sum*aa[adiag_i];
677   }
678 #endif
679   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
680   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
681   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 #undef __FUNCT__
686 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
687 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
688 {
689   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
690   IS             iscol = a->col,isrow = a->row;
691   PetscErrorCode ierr;
692   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
693   PetscInt       nz,*rout,*cout;
694   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
695 
696   PetscFunctionBegin;
697   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
698 
699   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
700   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
701   tmp  = a->solve_work;
702 
703   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
704   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
705 
706   /* forward solve the lower triangular */
707   tmp[0] = b[*r++];
708   for (i=1; i<n; i++) {
709     v   = aa + ai[i] ;
710     vi  = aj + ai[i] ;
711     nz  = a->diag[i] - ai[i];
712     sum = b[*r++];
713     while (nz--) sum -= *v++ * tmp[*vi++ ];
714     tmp[i] = sum;
715   }
716 
717   /* backward solve the upper triangular */
718   for (i=n-1; i>=0; i--){
719     v   = aa + a->diag[i] + 1;
720     vi  = aj + a->diag[i] + 1;
721     nz  = ai[i+1] - a->diag[i] - 1;
722     sum = tmp[i];
723     while (nz--) sum -= *v++ * tmp[*vi++ ];
724     tmp[i] = sum*aa[a->diag[i]];
725     x[*c--] += tmp[i];
726   }
727 
728   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
729   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
730   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
731   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
732   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
733 
734   PetscFunctionReturn(0);
735 }
736 /* -------------------------------------------------------------------*/
737 #undef __FUNCT__
738 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
739 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
740 {
741   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
742   IS             iscol = a->col,isrow = a->row;
743   PetscErrorCode ierr;
744   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
745   PetscInt       nz,*rout,*cout,*diag = a->diag;
746   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
747 
748   PetscFunctionBegin;
749   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
750   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
751   tmp  = a->solve_work;
752 
753   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
754   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
755 
756   /* copy the b into temp work space according to permutation */
757   for (i=0; i<n; i++) tmp[i] = b[c[i]];
758 
759   /* forward solve the U^T */
760   for (i=0; i<n; i++) {
761     v   = aa + diag[i] ;
762     vi  = aj + diag[i] + 1;
763     nz  = ai[i+1] - diag[i] - 1;
764     s1  = tmp[i];
765     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
766     while (nz--) {
767       tmp[*vi++ ] -= (*v++)*s1;
768     }
769     tmp[i] = s1;
770   }
771 
772   /* backward solve the L^T */
773   for (i=n-1; i>=0; i--){
774     v   = aa + diag[i] - 1 ;
775     vi  = aj + diag[i] - 1 ;
776     nz  = diag[i] - ai[i];
777     s1  = tmp[i];
778     while (nz--) {
779       tmp[*vi-- ] -= (*v--)*s1;
780     }
781   }
782 
783   /* copy tmp into x according to permutation */
784   for (i=0; i<n; i++) x[r[i]] = tmp[i];
785 
786   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
787   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
788   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
789   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
790 
791   ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
797 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
798 {
799   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
800   IS             iscol = a->col,isrow = a->row;
801   PetscErrorCode ierr;
802   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
803   PetscInt       nz,*rout,*cout,*diag = a->diag;
804   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
805 
806   PetscFunctionBegin;
807   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
808 
809   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
810   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
811   tmp = a->solve_work;
812 
813   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
814   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
815 
816   /* copy the b into temp work space according to permutation */
817   for (i=0; i<n; i++) tmp[i] = b[c[i]];
818 
819   /* forward solve the U^T */
820   for (i=0; i<n; i++) {
821     v   = aa + diag[i] ;
822     vi  = aj + diag[i] + 1;
823     nz  = ai[i+1] - diag[i] - 1;
824     tmp[i] *= *v++;
825     while (nz--) {
826       tmp[*vi++ ] -= (*v++)*tmp[i];
827     }
828   }
829 
830   /* backward solve the L^T */
831   for (i=n-1; i>=0; i--){
832     v   = aa + diag[i] - 1 ;
833     vi  = aj + diag[i] - 1 ;
834     nz  = diag[i] - ai[i];
835     while (nz--) {
836       tmp[*vi-- ] -= (*v--)*tmp[i];
837     }
838   }
839 
840   /* copy tmp into x according to permutation */
841   for (i=0; i<n; i++) x[r[i]] += tmp[i];
842 
843   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
844   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
845   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
846   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
847 
848   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
849   PetscFunctionReturn(0);
850 }
851 /* ----------------------------------------------------------------*/
852 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
856 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
857 {
858   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
859   IS             isicol;
860   PetscErrorCode ierr;
861   PetscInt       *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
862   PetscInt       *bi,*cols,nnz,*cols_lvl;
863   PetscInt       *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
864   PetscInt       i,levels,diagonal_fill;
865   PetscTruth     col_identity,row_identity;
866   PetscReal      f;
867   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
868   PetscBT        lnkbt;
869   PetscInt       nzi,*bj,**bj_ptr,**bjlvl_ptr;
870   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
871   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
872 
873   PetscFunctionBegin;
874   f             = info->fill;
875   levels        = (PetscInt)info->levels;
876   diagonal_fill = (PetscInt)info->diagonal_fill;
877   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
878 
879   /* special case that simply copies fill pattern */
880   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
881   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
882   if (!levels && row_identity && col_identity) {
883     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
884     (*fact)->factor = FACTOR_LU;
885     b               = (Mat_SeqAIJ*)(*fact)->data;
886     if (!b->diag) {
887       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
888     }
889     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
890     b->row              = isrow;
891     b->col              = iscol;
892     b->icol             = isicol;
893     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
894     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
895     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
896     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
897     PetscFunctionReturn(0);
898   }
899 
900   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
901   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
902 
903   /* get new row pointers */
904   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
905   bi[0] = 0;
906   /* bdiag is location of diagonal in factor */
907   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
908   bdiag[0]  = 0;
909 
910   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
911   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
912 
913   /* create a linked list for storing column indices of the active row */
914   nlnk = n + 1;
915   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
916 
917   /* initial FreeSpace size is f*(ai[n]+1) */
918   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
919   current_space = free_space;
920   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
921   current_space_lvl = free_space_lvl;
922 
923   for (i=0; i<n; i++) {
924     nzi = 0;
925     /* copy current row into linked list */
926     nnz  = ai[r[i]+1] - ai[r[i]];
927     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
928     cols = aj + ai[r[i]];
929     lnk[i] = -1; /* marker to indicate if diagonal exists */
930     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
931     nzi += nlnk;
932 
933     /* make sure diagonal entry is included */
934     if (diagonal_fill && lnk[i] == -1) {
935       fm = n;
936       while (lnk[fm] < i) fm = lnk[fm];
937       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
938       lnk[fm]    = i;
939       lnk_lvl[i] = 0;
940       nzi++; dcount++;
941     }
942 
943     /* add pivot rows into the active row */
944     nzbd = 0;
945     prow = lnk[n];
946     while (prow < i) {
947       nnz      = bdiag[prow];
948       cols     = bj_ptr[prow] + nnz + 1;
949       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
950       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
951       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
952       nzi += nlnk;
953       prow = lnk[prow];
954       nzbd++;
955     }
956     bdiag[i] = nzbd;
957     bi[i+1]  = bi[i] + nzi;
958 
959     /* if free space is not available, make more free space */
960     if (current_space->local_remaining<nzi) {
961       nnz = nzi*(n - i); /* estimated and max additional space needed */
962       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
963       ierr = GetMoreSpace(nnz,&current_space_lvl);CHKERRQ(ierr);
964       reallocs++;
965     }
966 
967     /* copy data into free_space and free_space_lvl, then initialize lnk */
968     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
969     bj_ptr[i]    = current_space->array;
970     bjlvl_ptr[i] = current_space_lvl->array;
971 
972     /* make sure the active row i has diagonal entry */
973     if (*(bj_ptr[i]+bdiag[i]) != i) {
974       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
975     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
976     }
977 
978     current_space->array           += nzi;
979     current_space->local_used      += nzi;
980     current_space->local_remaining -= nzi;
981     current_space_lvl->array           += nzi;
982     current_space_lvl->local_used      += nzi;
983     current_space_lvl->local_remaining -= nzi;
984   }
985 
986   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
987   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
988 
989   /* destroy list of free space and other temporary arrays */
990   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
991   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
992   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
993   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
994   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
995 
996 #if defined(PETSC_USE_DEBUG)
997   {
998     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
999     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
1000     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr);
1001     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr);
1002     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
1003     if (diagonal_fill) {
1004       ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr);
1005     }
1006   }
1007 #endif
1008 
1009   /* put together the new matrix */
1010   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
1011   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
1012   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1013   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1014   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1015   b = (Mat_SeqAIJ*)(*fact)->data;
1016   b->freedata     = PETSC_TRUE;
1017   b->singlemalloc = PETSC_FALSE;
1018   len = (bi[n] )*sizeof(PetscScalar);
1019   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1020   b->j          = bj;
1021   b->i          = bi;
1022   for (i=0; i<n; i++) bdiag[i] += bi[i];
1023   b->diag       = bdiag;
1024   b->ilen       = 0;
1025   b->imax       = 0;
1026   b->row        = isrow;
1027   b->col        = iscol;
1028   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1029   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1030   b->icol       = isicol;
1031   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1032   /* In b structure:  Free imax, ilen, old a, old j.
1033      Allocate bdiag, solve_work, new a, new j */
1034   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1035   b->maxnz             = b->nz = bi[n] ;
1036   (*fact)->factor = FACTOR_LU;
1037   (*fact)->info.factor_mallocs    = reallocs;
1038   (*fact)->info.fill_ratio_given  = f;
1039   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1040 
1041   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1042   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1043 
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 #include "src/mat/impls/sbaij/seq/sbaij.h"
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1050 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1051 {
1052   Mat            C = *B;
1053   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1054   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1055   IS             ip=b->row;
1056   PetscErrorCode ierr;
1057   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1058   PetscInt       *ai=a->i,*aj=a->j;
1059   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1060   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1061   PetscReal      zeropivot,rs,shiftnz;
1062   PetscReal      shiftpd;
1063   ChShift_Ctx    sctx;
1064   PetscInt       newshift;
1065 
1066   PetscFunctionBegin;
1067   shiftnz   = info->shiftnz;
1068   shiftpd   = info->shiftpd;
1069   zeropivot = info->zeropivot;
1070 
1071   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1072 
1073   /* initialization */
1074   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1075   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1076   jl   = il + mbs;
1077   rtmp = (MatScalar*)(jl + mbs);
1078 
1079   sctx.shift_amount = 0;
1080   sctx.nshift       = 0;
1081   do {
1082     sctx.chshift = PETSC_FALSE;
1083     for (i=0; i<mbs; i++) {
1084       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1085     }
1086 
1087     for (k = 0; k<mbs; k++){
1088       bval = ba + bi[k];
1089       /* initialize k-th row by the perm[k]-th row of A */
1090       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1091       for (j = jmin; j < jmax; j++){
1092         col = rip[aj[j]];
1093         if (col >= k){ /* only take upper triangular entry */
1094           rtmp[col] = aa[j];
1095           *bval++  = 0.0; /* for in-place factorization */
1096         }
1097       }
1098       /* shift the diagonal of the matrix */
1099       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1100 
1101       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1102       dk = rtmp[k];
1103       i = jl[k]; /* first row to be added to k_th row  */
1104 
1105       while (i < k){
1106         nexti = jl[i]; /* next row to be added to k_th row */
1107 
1108         /* compute multiplier, update diag(k) and U(i,k) */
1109         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1110         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1111         dk += uikdi*ba[ili];
1112         ba[ili] = uikdi; /* -U(i,k) */
1113 
1114         /* add multiple of row i to k-th row */
1115         jmin = ili + 1; jmax = bi[i+1];
1116         if (jmin < jmax){
1117           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1118           /* update il and jl for row i */
1119           il[i] = jmin;
1120           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1121         }
1122         i = nexti;
1123       }
1124 
1125       /* shift the diagonals when zero pivot is detected */
1126       /* compute rs=sum of abs(off-diagonal) */
1127       rs   = 0.0;
1128       jmin = bi[k]+1;
1129       nz   = bi[k+1] - jmin;
1130       if (nz){
1131         bcol = bj + jmin;
1132         while (nz--){
1133           rs += PetscAbsScalar(rtmp[*bcol]);
1134           bcol++;
1135         }
1136       }
1137 
1138       sctx.rs = rs;
1139       sctx.pv = dk;
1140       ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
1141       if (newshift == 1){
1142         break;    /* sctx.shift_amount is updated */
1143       } else if (newshift == -1){
1144         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1145       }
1146 
1147       /* copy data into U(k,:) */
1148       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1149       jmin = bi[k]+1; jmax = bi[k+1];
1150       if (jmin < jmax) {
1151         for (j=jmin; j<jmax; j++){
1152           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1153         }
1154         /* add the k-th row into il and jl */
1155         il[k] = jmin;
1156         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1157       }
1158     }
1159   } while (sctx.chshift);
1160   ierr = PetscFree(il);CHKERRQ(ierr);
1161 
1162   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1163   C->factor       = FACTOR_CHOLESKY;
1164   C->assembled    = PETSC_TRUE;
1165   C->preallocated = PETSC_TRUE;
1166   ierr = PetscLogFlops(C->m);CHKERRQ(ierr);
1167   if (sctx.nshift){
1168     if (shiftnz) {
1169       ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
1170     } else if (shiftpd) {
1171       ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
1172     }
1173   }
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1179 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1180 {
1181   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1182   Mat_SeqSBAIJ   *b;
1183   Mat            B;
1184   PetscErrorCode ierr;
1185   PetscTruth     perm_identity;
1186   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1187   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1188   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1189   PetscInt       ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1190   PetscReal      fill=info->fill,levels=info->levels;
1191   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1192   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1193   PetscBT        lnkbt;
1194 
1195   PetscFunctionBegin;
1196   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1197   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1198 
1199   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1200   ui[0] = 0;
1201 
1202   /* special case that simply copies fill pattern */
1203   if (!levels && perm_identity) {
1204     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1205     for (i=0; i<am; i++) {
1206       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1207     }
1208     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1209     cols = uj;
1210     for (i=0; i<am; i++) {
1211       aj    = a->j + a->diag[i];
1212       ncols = ui[i+1] - ui[i];
1213       for (j=0; j<ncols; j++) *cols++ = *aj++;
1214     }
1215   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1216     /* initialization */
1217     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1218 
1219     /* jl: linked list for storing indices of the pivot rows
1220        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1221     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1222     il         = jl + am;
1223     uj_ptr     = (PetscInt**)(il + am);
1224     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1225     for (i=0; i<am; i++){
1226       jl[i] = am; il[i] = 0;
1227     }
1228 
1229     /* create and initialize a linked list for storing column indices of the active row k */
1230     nlnk = am + 1;
1231     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1232 
1233     /* initial FreeSpace size is fill*(ai[am]+1) */
1234     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1235     current_space = free_space;
1236     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1237     current_space_lvl = free_space_lvl;
1238 
1239     for (k=0; k<am; k++){  /* for each active row k */
1240       /* initialize lnk by the column indices of row rip[k] of A */
1241       nzk   = 0;
1242       ncols = ai[rip[k]+1] - ai[rip[k]];
1243       ncols_upper = 0;
1244       for (j=0; j<ncols; j++){
1245         i = *(aj + ai[rip[k]] + j);
1246         if (rip[i] >= k){ /* only take upper triangular entry */
1247           ajtmp[ncols_upper] = i;
1248           ncols_upper++;
1249         }
1250       }
1251       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1252       nzk += nlnk;
1253 
1254       /* update lnk by computing fill-in for each pivot row to be merged in */
1255       prow = jl[k]; /* 1st pivot row */
1256 
1257       while (prow < k){
1258         nextprow = jl[prow];
1259 
1260         /* merge prow into k-th row */
1261         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1262         jmax = ui[prow+1];
1263         ncols = jmax-jmin;
1264         i     = jmin - ui[prow];
1265         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1266         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1267         j     = *(uj - 1);
1268         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1269         nzk += nlnk;
1270 
1271         /* update il and jl for prow */
1272         if (jmin < jmax){
1273           il[prow] = jmin;
1274           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1275         }
1276         prow = nextprow;
1277       }
1278 
1279       /* if free space is not available, make more free space */
1280       if (current_space->local_remaining<nzk) {
1281         i = am - k + 1; /* num of unfactored rows */
1282         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1283         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1284         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1285         reallocs++;
1286       }
1287 
1288       /* copy data into free_space and free_space_lvl, then initialize lnk */
1289       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1290 
1291       /* add the k-th row into il and jl */
1292       if (nzk > 1){
1293         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1294         jl[k] = jl[i]; jl[i] = k;
1295         il[k] = ui[k] + 1;
1296       }
1297       uj_ptr[k]     = current_space->array;
1298       uj_lvl_ptr[k] = current_space_lvl->array;
1299 
1300       current_space->array           += nzk;
1301       current_space->local_used      += nzk;
1302       current_space->local_remaining -= nzk;
1303 
1304       current_space_lvl->array           += nzk;
1305       current_space_lvl->local_used      += nzk;
1306       current_space_lvl->local_remaining -= nzk;
1307 
1308       ui[k+1] = ui[k] + nzk;
1309     }
1310 
1311 #if defined(PETSC_USE_DEBUG)
1312     if (ai[am] != 0) {
1313       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1314       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1315       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1316       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1317     } else {
1318       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1319     }
1320 #endif
1321 
1322     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1323     ierr = PetscFree(jl);CHKERRQ(ierr);
1324     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1325 
1326     /* destroy list of free space and other temporary array(s) */
1327     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1328     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1329     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1330     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1331 
1332   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1333 
1334   /* put together the new matrix in MATSEQSBAIJ format */
1335   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1336   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1337   B = *fact;
1338   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1339   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1340 
1341   b    = (Mat_SeqSBAIJ*)B->data;
1342   b->singlemalloc = PETSC_FALSE;
1343   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1344   b->j    = uj;
1345   b->i    = ui;
1346   b->diag = 0;
1347   b->ilen = 0;
1348   b->imax = 0;
1349   b->row  = perm;
1350   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1351   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1352   b->icol = perm;
1353   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1354   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1355   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1356   b->maxnz = b->nz = ui[am];
1357 
1358   B->factor                 = FACTOR_CHOLESKY;
1359   B->info.factor_mallocs    = reallocs;
1360   B->info.fill_ratio_given  = fill;
1361   if (ai[am] != 0) {
1362     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1363   } else {
1364     B->info.fill_ratio_needed = 0.0;
1365   }
1366   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1367   if (perm_identity){
1368     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1369     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1376 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1377 {
1378   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1379   Mat_SeqSBAIJ   *b;
1380   Mat            B;
1381   PetscErrorCode ierr;
1382   PetscTruth     perm_identity;
1383   PetscReal      fill = info->fill;
1384   PetscInt       *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1385   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1386   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1387   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1388   PetscBT        lnkbt;
1389   IS             iperm;
1390 
1391   PetscFunctionBegin;
1392   /* check whether perm is the identity mapping */
1393   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1394   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1395 
1396   if (!perm_identity){
1397     /* check if perm is symmetric! */
1398     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1399     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1400     for (i=0; i<am; i++) {
1401       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1402     }
1403     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1404     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1405   }
1406 
1407   /* initialization */
1408   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1409   ui[0] = 0;
1410 
1411   /* jl: linked list for storing indices of the pivot rows
1412      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1413   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1414   il     = jl + am;
1415   cols   = il + am;
1416   ui_ptr = (PetscInt**)(cols + am);
1417   for (i=0; i<am; i++){
1418     jl[i] = am; il[i] = 0;
1419   }
1420 
1421   /* create and initialize a linked list for storing column indices of the active row k */
1422   nlnk = am + 1;
1423   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1424 
1425   /* initial FreeSpace size is fill*(ai[am]+1) */
1426   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1427   current_space = free_space;
1428 
1429   for (k=0; k<am; k++){  /* for each active row k */
1430     /* initialize lnk by the column indices of row rip[k] of A */
1431     nzk   = 0;
1432     ncols = ai[rip[k]+1] - ai[rip[k]];
1433     ncols_upper = 0;
1434     for (j=0; j<ncols; j++){
1435       i = rip[*(aj + ai[rip[k]] + j)];
1436       if (i >= k){ /* only take upper triangular entry */
1437         cols[ncols_upper] = i;
1438         ncols_upper++;
1439       }
1440     }
1441     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1442     nzk += nlnk;
1443 
1444     /* update lnk by computing fill-in for each pivot row to be merged in */
1445     prow = jl[k]; /* 1st pivot row */
1446 
1447     while (prow < k){
1448       nextprow = jl[prow];
1449       /* merge prow into k-th row */
1450       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1451       jmax = ui[prow+1];
1452       ncols = jmax-jmin;
1453       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1454       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1455       nzk += nlnk;
1456 
1457       /* update il and jl for prow */
1458       if (jmin < jmax){
1459         il[prow] = jmin;
1460         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1461       }
1462       prow = nextprow;
1463     }
1464 
1465     /* if free space is not available, make more free space */
1466     if (current_space->local_remaining<nzk) {
1467       i = am - k + 1; /* num of unfactored rows */
1468       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1469       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1470       reallocs++;
1471     }
1472 
1473     /* copy data into free space, then initialize lnk */
1474     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1475 
1476     /* add the k-th row into il and jl */
1477     if (nzk-1 > 0){
1478       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1479       jl[k] = jl[i]; jl[i] = k;
1480       il[k] = ui[k] + 1;
1481     }
1482     ui_ptr[k] = current_space->array;
1483     current_space->array           += nzk;
1484     current_space->local_used      += nzk;
1485     current_space->local_remaining -= nzk;
1486 
1487     ui[k+1] = ui[k] + nzk;
1488   }
1489 
1490 #if defined(PETSC_USE_DEBUG)
1491   if (ai[am] != 0) {
1492     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1493     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1494     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1495     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1496   } else {
1497      ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1498   }
1499 #endif
1500 
1501   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1502   ierr = PetscFree(jl);CHKERRQ(ierr);
1503 
1504   /* destroy list of free space and other temporary array(s) */
1505   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1506   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1507   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1508 
1509   /* put together the new matrix in MATSEQSBAIJ format */
1510   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1511   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1512   B    = *fact;
1513   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1514   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1515 
1516   b = (Mat_SeqSBAIJ*)B->data;
1517   b->singlemalloc = PETSC_FALSE;
1518   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1519   b->j    = uj;
1520   b->i    = ui;
1521   b->diag = 0;
1522   b->ilen = 0;
1523   b->imax = 0;
1524   b->row  = perm;
1525   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1526   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1527   b->icol = perm;
1528   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1529   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1530   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1531   b->maxnz = b->nz = ui[am];
1532 
1533   B->factor                 = FACTOR_CHOLESKY;
1534   B->info.factor_mallocs    = reallocs;
1535   B->info.fill_ratio_given  = fill;
1536   if (ai[am] != 0) {
1537     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1538   } else {
1539     B->info.fill_ratio_needed = 0.0;
1540   }
1541   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1542   if (perm_identity){
1543     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1544     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1545   }
1546   PetscFunctionReturn(0);
1547 }
1548