xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 2a6744eb01855f5aa328eb8fdf4b0d01e72ad151)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
23 
24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
28 #endif
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
32   /* ------------------------------------------------------------
33 
34           This interface was contribed by Tony Caola
35 
36      This routine is an interface to the pivoting drop-tolerance
37      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
38      SPARSEKIT2.
39 
40      The SPARSEKIT2 routines used here are covered by the GNU
41      copyright; see the file gnu in this directory.
42 
43      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
44      help in getting this routine ironed out.
45 
46      The major drawback to this routine is that if info->fill is
47      not large enough it fails rather than allocating more space;
48      this can be fixed by hacking/improving the f2c version of
49      Yousef Saad's code.
50 
51      ------------------------------------------------------------
52 */
53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
54 {
55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
56   PetscFunctionBegin;
57   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
58   You can obtain the drop tolerance routines by installing PETSc from\n\
59   www.mcs.anl.gov/petsc\n");
60 #else
61   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
62   IS             iscolf,isicol,isirow;
63   PetscTruth     reorder;
64   PetscErrorCode ierr,sierr;
65   PetscInt       *c,*r,*ic,i,n = A->rmap.n;
66   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
67   PetscInt       *ordcol,*iwk,*iperm,*jw;
68   PetscInt       jmax,lfill,job,*o_i,*o_j;
69   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
70   PetscReal      af;
71 
72   PetscFunctionBegin;
73 
74   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
75   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
76   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
77   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
78   lfill   = (PetscInt)(info->dtcount/2.0);
79   jmax    = (PetscInt)(info->fill*a->nz);
80 
81 
82   /* ------------------------------------------------------------
83      If reorder=.TRUE., then the original matrix has to be
84      reordered to reflect the user selected ordering scheme, and
85      then de-reordered so it is in it's original format.
86      Because Saad's dperm() is NOT in place, we have to copy
87      the original matrix and allocate more storage. . .
88      ------------------------------------------------------------
89   */
90 
91   /* set reorder to true if either isrow or iscol is not identity */
92   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
93   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
94   reorder = PetscNot(reorder);
95 
96 
97   /* storage for ilu factor */
98   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
100   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
101   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
102 
103   /* ------------------------------------------------------------
104      Make sure that everything is Fortran formatted (1-Based)
105      ------------------------------------------------------------
106   */
107   for (i=old_i[0];i<old_i[n];i++) {
108     old_j[i]++;
109   }
110   for(i=0;i<n+1;i++) {
111     old_i[i]++;
112   };
113 
114 
115   if (reorder) {
116     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
117     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       r[i]  = r[i]+1;
120       c[i]  = c[i]+1;
121     }
122     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
123     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
124     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
125     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126     for (i=0;i<n;i++) {
127       r[i]  = r[i]-1;
128       c[i]  = c[i]-1;
129     }
130     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
131     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
132     o_a = old_a2;
133     o_j = old_j2;
134     o_i = old_i2;
135   } else {
136     o_a = old_a;
137     o_j = old_j;
138     o_i = old_i;
139   }
140 
141   /* ------------------------------------------------------------
142      Call Saad's ilutp() routine to generate the factorization
143      ------------------------------------------------------------
144   */
145 
146   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
147   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
148   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
149 
150   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151   if (sierr) {
152     switch (sierr) {
153       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
154       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
155       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159     }
160   }
161 
162   ierr = PetscFree(w);CHKERRQ(ierr);
163   ierr = PetscFree(jw);CHKERRQ(ierr);
164 
165   /* ------------------------------------------------------------
166      Saad's routine gives the result in Modified Sparse Row (msr)
167      Convert to Compressed Sparse Row format (csr)
168      ------------------------------------------------------------
169   */
170 
171   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
172   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
173 
174   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
175 
176   ierr = PetscFree(iwk);CHKERRQ(ierr);
177   ierr = PetscFree(wk);CHKERRQ(ierr);
178 
179   if (reorder) {
180     ierr = PetscFree(old_a2);CHKERRQ(ierr);
181     ierr = PetscFree(old_j2);CHKERRQ(ierr);
182     ierr = PetscFree(old_i2);CHKERRQ(ierr);
183   } else {
184     /* fix permutation of old_j that the factorization introduced */
185     for (i=old_i[0]; i<old_i[n]; i++) {
186       old_j[i-1] = iperm[old_j[i-1]-1];
187     }
188   }
189 
190   /* get rid of the shift to indices starting at 1 */
191   for (i=0; i<n+1; i++) {
192     old_i[i]--;
193   }
194   for (i=old_i[0];i<old_i[n];i++) {
195     old_j[i]--;
196   }
197 
198   /* Make the factored matrix 0-based */
199   for (i=0; i<n+1; i++) {
200     new_i[i]--;
201   }
202   for (i=new_i[0];i<new_i[n];i++) {
203     new_j[i]--;
204   }
205 
206   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207   /*-- permute the right-hand-side and solution vectors           --*/
208   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
209   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
210   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
211   for(i=0; i<n; i++) {
212     ordcol[i] = ic[iperm[i]-1];
213   };
214   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
215   ierr = ISDestroy(isicol);CHKERRQ(ierr);
216 
217   ierr = PetscFree(iperm);CHKERRQ(ierr);
218 
219   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
220   ierr = PetscFree(ordcol);CHKERRQ(ierr);
221 
222   /*----- put together the new matrix -----*/
223 
224   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
225   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
226   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
227   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
228   (*fact)->factor    = FACTOR_LU;
229   (*fact)->assembled = PETSC_TRUE;
230 
231   b = (Mat_SeqAIJ*)(*fact)->data;
232   b->free_a        = PETSC_TRUE;
233   b->free_ij       = PETSC_TRUE;
234   b->sorted        = PETSC_FALSE;
235   b->singlemalloc  = PETSC_FALSE;
236   b->a             = new_a;
237   b->j             = new_j;
238   b->i             = new_i;
239   b->ilen          = 0;
240   b->imax          = 0;
241   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
242   b->row           = isirow;
243   b->col           = iscolf;
244   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
245   b->maxnz = b->nz = new_i[n];
246   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
247   (*fact)->info.factor_mallocs = 0;
248 
249   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr);
253   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
254   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
255   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
256 
257   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
258 
259   PetscFunctionReturn(0);
260 #endif
261 }
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
265 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
266 {
267   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
268   IS                 isicol;
269   PetscErrorCode     ierr;
270   PetscInt           *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
271   PetscInt           *bi,*bj,*ajtmp;
272   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
273   PetscReal          f;
274   PetscInt           nlnk,*lnk,k,**bi_ptr;
275   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
276   PetscBT            lnkbt;
277 
278   PetscFunctionBegin;
279   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
280   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
281   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
282   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
283 
284   /* get new row pointers */
285   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
286   bi[0] = 0;
287 
288   /* bdiag is location of diagonal in factor */
289   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
290   bdiag[0] = 0;
291 
292   /* linked list for storing column indices of the active row */
293   nlnk = n + 1;
294   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
295 
296   ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr);
297   bi_ptr = (PetscInt**)(im + n);
298 
299   /* initial FreeSpace size is f*(ai[n]+1) */
300   f = info->fill;
301   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
302   current_space = free_space;
303 
304   for (i=0; i<n; i++) {
305     /* copy previous fill into linked list */
306     nzi = 0;
307     nnz = ai[r[i]+1] - ai[r[i]];
308     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
309     ajtmp = aj + ai[r[i]];
310     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
311     nzi += nlnk;
312 
313     /* add pivot rows into linked list */
314     row = lnk[n];
315     while (row < i) {
316       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
317       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
318       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
319       nzi += nlnk;
320       row  = lnk[row];
321     }
322     bi[i+1] = bi[i] + nzi;
323     im[i]   = nzi;
324 
325     /* mark bdiag */
326     nzbd = 0;
327     nnz  = nzi;
328     k    = lnk[n];
329     while (nnz-- && k < i){
330       nzbd++;
331       k = lnk[k];
332     }
333     bdiag[i] = bi[i] + nzbd;
334 
335     /* if free space is not available, make more free space */
336     if (current_space->local_remaining<nzi) {
337       nnz = (n - i)*nzi; /* estimated and max additional space needed */
338       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
339       reallocs++;
340     }
341 
342     /* copy data into free space, then initialize lnk */
343     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
344     bi_ptr[i] = current_space->array;
345     current_space->array           += nzi;
346     current_space->local_used      += nzi;
347     current_space->local_remaining -= nzi;
348   }
349 #if defined(PETSC_USE_INFO)
350   if (ai[n] != 0) {
351     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
352     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
353     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
354     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
355     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
356   } else {
357     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
358   }
359 #endif
360 
361   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
362   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
363 
364   /* destroy list of free space and other temporary array(s) */
365   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
366   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
367   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
368   ierr = PetscFree(im);CHKERRQ(ierr);
369 
370   /* put together the new matrix */
371   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
372   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
373   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
374   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
375   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
376   b    = (Mat_SeqAIJ*)(*B)->data;
377   b->free_a       = PETSC_TRUE;
378   b->free_ij      = PETSC_TRUE;
379   b->singlemalloc = PETSC_FALSE;
380   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
381   b->j          = bj;
382   b->i          = bi;
383   b->diag       = bdiag;
384   b->ilen       = 0;
385   b->imax       = 0;
386   b->row        = isrow;
387   b->col        = iscol;
388   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
389   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
390   b->icol       = isicol;
391   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
392 
393   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
394   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
395   b->maxnz = b->nz = bi[n] ;
396 
397   (*B)->factor                 =  FACTOR_LU;
398   (*B)->info.factor_mallocs    = reallocs;
399   (*B)->info.fill_ratio_given  = f;
400 
401   if (ai[n] != 0) {
402     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
403   } else {
404     (*B)->info.fill_ratio_needed = 0.0;
405   }
406   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
407   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
408   PetscFunctionReturn(0);
409 }
410 
411 /*
412     Trouble in factorization, should we dump the original matrix?
413 */
414 #undef __FUNCT__
415 #define __FUNCT__ "MatFactorDumpMatrix"
416 PetscErrorCode MatFactorDumpMatrix(Mat A)
417 {
418   PetscErrorCode ierr;
419   PetscTruth     flg;
420 
421   PetscFunctionBegin;
422   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr);
423   if (flg) {
424     PetscViewer viewer;
425     char        filename[PETSC_MAX_PATH_LEN];
426 
427     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
428     ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
429     ierr = MatView(A,viewer);CHKERRQ(ierr);
430     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
431   }
432   PetscFunctionReturn(0);
433 }
434 
435 /* ----------------------------------------------------------- */
436 #undef __FUNCT__
437 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
438 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
439 {
440   Mat            C=*B;
441   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
442   IS             isrow = b->row,isicol = b->icol;
443   PetscErrorCode ierr;
444   PetscInt       *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
445   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
446   PetscInt       *diag_offset = b->diag,diag,*pj;
447   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
448   PetscScalar    d;
449   PetscReal      rs;
450   LUShift_Ctx    sctx;
451   PetscInt       newshift,*ddiag;
452 
453   PetscFunctionBegin;
454   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
455   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
456   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
457   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
458   rtmps = rtmp; ics = ic;
459 
460   sctx.shift_top  = 0;
461   sctx.nshift_max = 0;
462   sctx.shift_lo   = 0;
463   sctx.shift_hi   = 0;
464 
465   if (!a->diag) {
466     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
467   }
468   /* if both shift schemes are chosen by user, only use info->shiftpd */
469   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
470   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
471     PetscInt *aai = a->i;
472     ddiag          = a->diag;
473     sctx.shift_top = 0;
474     for (i=0; i<n; i++) {
475       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
476       d  = (a->a)[ddiag[i]];
477       rs = -PetscAbsScalar(d) - PetscRealPart(d);
478       v  = a->a+aai[i];
479       nz = aai[i+1] - aai[i];
480       for (j=0; j<nz; j++)
481 	rs += PetscAbsScalar(v[j]);
482       if (rs>sctx.shift_top) sctx.shift_top = rs;
483     }
484     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
485     sctx.shift_top    *= 1.1;
486     sctx.nshift_max   = 5;
487     sctx.shift_lo     = 0.;
488     sctx.shift_hi     = 1.;
489   }
490 
491   sctx.shift_amount = 0;
492   sctx.nshift       = 0;
493   do {
494     sctx.lushift = PETSC_FALSE;
495     for (i=0; i<n; i++){
496       nz    = bi[i+1] - bi[i];
497       bjtmp = bj + bi[i];
498       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
499 
500       /* load in initial (unfactored row) */
501       nz    = a->i[r[i]+1] - a->i[r[i]];
502       ajtmp = a->j + a->i[r[i]];
503       v     = a->a + a->i[r[i]];
504       for (j=0; j<nz; j++) {
505         rtmp[ics[ajtmp[j]]] = v[j];
506       }
507       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
508 
509       row = *bjtmp++;
510       while  (row < i) {
511         pc = rtmp + row;
512         if (*pc != 0.0) {
513           pv         = b->a + diag_offset[row];
514           pj         = b->j + diag_offset[row] + 1;
515           multiplier = *pc / *pv++;
516           *pc        = multiplier;
517           nz         = bi[row+1] - diag_offset[row] - 1;
518           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
519           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
520         }
521         row = *bjtmp++;
522       }
523       /* finished row so stick it into b->a */
524       pv   = b->a + bi[i] ;
525       pj   = b->j + bi[i] ;
526       nz   = bi[i+1] - bi[i];
527       diag = diag_offset[i] - bi[i];
528       rs   = 0.0;
529       for (j=0; j<nz; j++) {
530         pv[j] = rtmps[pj[j]];
531         if (j != diag) rs += PetscAbsScalar(pv[j]);
532       }
533 
534       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
535       sctx.rs  = rs;
536       sctx.pv  = pv[diag];
537       ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr);
538       if (newshift == 1) break;
539     }
540 
541     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
542       /*
543        * if no shift in this attempt & shifting & started shifting & can refine,
544        * then try lower shift
545        */
546       sctx.shift_hi        = info->shift_fraction;
547       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
548       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
549       sctx.lushift         = PETSC_TRUE;
550       sctx.nshift++;
551     }
552   } while (sctx.lushift);
553 
554   /* invert diagonal entries for simplier triangular solves */
555   for (i=0; i<n; i++) {
556     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
557   }
558 
559   ierr = PetscFree(rtmp);CHKERRQ(ierr);
560   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
561   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
562   C->factor = FACTOR_LU;
563   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
564   C->assembled = PETSC_TRUE;
565   ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr);
566   if (sctx.nshift){
567     if (info->shiftnz) {
568       ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
569     } else if (info->shiftpd) {
570       ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
571     }
572   }
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
578 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
579 {
580   PetscFunctionBegin;
581   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
582   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
583   PetscFunctionReturn(0);
584 }
585 
586 
587 /* ----------------------------------------------------------- */
588 #undef __FUNCT__
589 #define __FUNCT__ "MatLUFactor_SeqAIJ"
590 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
591 {
592   PetscErrorCode ierr;
593   Mat            C;
594 
595   PetscFunctionBegin;
596   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
597   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
598   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
599   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602 /* ----------------------------------------------------------- */
603 #undef __FUNCT__
604 #define __FUNCT__ "MatSolve_SeqAIJ"
605 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
606 {
607   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
608   IS             iscol = a->col,isrow = a->row;
609   PetscErrorCode ierr;
610   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
611   PetscInt       nz,*rout,*cout;
612   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
613 
614   PetscFunctionBegin;
615   if (!n) PetscFunctionReturn(0);
616 
617   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
618   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
619   tmp  = a->solve_work;
620 
621   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
622   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
623 
624   /* forward solve the lower triangular */
625   tmp[0] = b[*r++];
626   tmps   = tmp;
627   for (i=1; i<n; i++) {
628     v   = aa + ai[i] ;
629     vi  = aj + ai[i] ;
630     nz  = a->diag[i] - ai[i];
631     sum = b[*r++];
632     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
633     tmp[i] = sum;
634   }
635 
636   /* backward solve the upper triangular */
637   for (i=n-1; i>=0; i--){
638     v   = aa + a->diag[i] + 1;
639     vi  = aj + a->diag[i] + 1;
640     nz  = ai[i+1] - a->diag[i] - 1;
641     sum = tmp[i];
642     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
643     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
644   }
645 
646   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
647   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
648   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
649   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
650   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 
654 #undef __FUNCT__
655 #define __FUNCT__ "MatMatSolve_SeqAIJ"
656 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
657 {
658   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
659   IS             iscol = a->col,isrow = a->row;
660   PetscErrorCode ierr;
661   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
662   PetscInt       nz,*rout,*cout,neq;
663   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
664 
665   PetscFunctionBegin;
666   if (!n) PetscFunctionReturn(0);
667 
668   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
669   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
670 
671   tmp  = a->solve_work;
672   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
673   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
674 
675   for (neq=0; neq<n; neq++){
676     /* forward solve the lower triangular */
677     tmp[0] = b[r[0]];
678     tmps   = tmp;
679     for (i=1; i<n; i++) {
680       v   = aa + ai[i] ;
681       vi  = aj + ai[i] ;
682       nz  = a->diag[i] - ai[i];
683       sum = b[r[i]];
684       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
685       tmp[i] = sum;
686     }
687     /* backward solve the upper triangular */
688     for (i=n-1; i>=0; i--){
689       v   = aa + a->diag[i] + 1;
690       vi  = aj + a->diag[i] + 1;
691       nz  = ai[i+1] - a->diag[i] - 1;
692       sum = tmp[i];
693       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
694       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
695     }
696 
697     b += n;
698     x += n;
699   }
700   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
701   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
702   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
703   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
704   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 /* ----------------------------------------------------------- */
709 #undef __FUNCT__
710 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
711 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
712 {
713   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
714   PetscErrorCode ierr;
715   PetscInt       n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
716   PetscScalar    *x,*b,*aa = a->a;
717 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
718   PetscInt       adiag_i,i,*vi,nz,ai_i;
719   PetscScalar    *v,sum;
720 #endif
721 
722   PetscFunctionBegin;
723   if (!n) PetscFunctionReturn(0);
724 
725   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
726   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
727 
728 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
729   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
730 #else
731   /* forward solve the lower triangular */
732   x[0] = b[0];
733   for (i=1; i<n; i++) {
734     ai_i = ai[i];
735     v    = aa + ai_i;
736     vi   = aj + ai_i;
737     nz   = adiag[i] - ai_i;
738     sum  = b[i];
739     while (nz--) sum -= *v++ * x[*vi++];
740     x[i] = sum;
741   }
742 
743   /* backward solve the upper triangular */
744   for (i=n-1; i>=0; i--){
745     adiag_i = adiag[i];
746     v       = aa + adiag_i + 1;
747     vi      = aj + adiag_i + 1;
748     nz      = ai[i+1] - adiag_i - 1;
749     sum     = x[i];
750     while (nz--) sum -= *v++ * x[*vi++];
751     x[i]    = sum*aa[adiag_i];
752   }
753 #endif
754   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
755   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
756   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
762 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
763 {
764   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
765   IS             iscol = a->col,isrow = a->row;
766   PetscErrorCode ierr;
767   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
768   PetscInt       nz,*rout,*cout;
769   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
770 
771   PetscFunctionBegin;
772   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
773 
774   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
775   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
776   tmp  = a->solve_work;
777 
778   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
779   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
780 
781   /* forward solve the lower triangular */
782   tmp[0] = b[*r++];
783   for (i=1; i<n; i++) {
784     v   = aa + ai[i] ;
785     vi  = aj + ai[i] ;
786     nz  = a->diag[i] - ai[i];
787     sum = b[*r++];
788     while (nz--) sum -= *v++ * tmp[*vi++ ];
789     tmp[i] = sum;
790   }
791 
792   /* backward solve the upper triangular */
793   for (i=n-1; i>=0; i--){
794     v   = aa + a->diag[i] + 1;
795     vi  = aj + a->diag[i] + 1;
796     nz  = ai[i+1] - a->diag[i] - 1;
797     sum = tmp[i];
798     while (nz--) sum -= *v++ * tmp[*vi++ ];
799     tmp[i] = sum*aa[a->diag[i]];
800     x[*c--] += tmp[i];
801   }
802 
803   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
804   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
805   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
806   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
807   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
808 
809   PetscFunctionReturn(0);
810 }
811 /* -------------------------------------------------------------------*/
812 #undef __FUNCT__
813 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
814 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
815 {
816   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
817   IS             iscol = a->col,isrow = a->row;
818   PetscErrorCode ierr;
819   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
820   PetscInt       nz,*rout,*cout,*diag = a->diag;
821   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
822 
823   PetscFunctionBegin;
824   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
825   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
826   tmp  = a->solve_work;
827 
828   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
829   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
830 
831   /* copy the b into temp work space according to permutation */
832   for (i=0; i<n; i++) tmp[i] = b[c[i]];
833 
834   /* forward solve the U^T */
835   for (i=0; i<n; i++) {
836     v   = aa + diag[i] ;
837     vi  = aj + diag[i] + 1;
838     nz  = ai[i+1] - diag[i] - 1;
839     s1  = tmp[i];
840     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
841     while (nz--) {
842       tmp[*vi++ ] -= (*v++)*s1;
843     }
844     tmp[i] = s1;
845   }
846 
847   /* backward solve the L^T */
848   for (i=n-1; i>=0; i--){
849     v   = aa + diag[i] - 1 ;
850     vi  = aj + diag[i] - 1 ;
851     nz  = diag[i] - ai[i];
852     s1  = tmp[i];
853     while (nz--) {
854       tmp[*vi-- ] -= (*v--)*s1;
855     }
856   }
857 
858   /* copy tmp into x according to permutation */
859   for (i=0; i<n; i++) x[r[i]] = tmp[i];
860 
861   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
862   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
863   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
864   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
865 
866   ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
872 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
873 {
874   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
875   IS             iscol = a->col,isrow = a->row;
876   PetscErrorCode ierr;
877   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
878   PetscInt       nz,*rout,*cout,*diag = a->diag;
879   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
880 
881   PetscFunctionBegin;
882   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
883 
884   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
885   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
886   tmp = a->solve_work;
887 
888   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
889   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
890 
891   /* copy the b into temp work space according to permutation */
892   for (i=0; i<n; i++) tmp[i] = b[c[i]];
893 
894   /* forward solve the U^T */
895   for (i=0; i<n; i++) {
896     v   = aa + diag[i] ;
897     vi  = aj + diag[i] + 1;
898     nz  = ai[i+1] - diag[i] - 1;
899     tmp[i] *= *v++;
900     while (nz--) {
901       tmp[*vi++ ] -= (*v++)*tmp[i];
902     }
903   }
904 
905   /* backward solve the L^T */
906   for (i=n-1; i>=0; i--){
907     v   = aa + diag[i] - 1 ;
908     vi  = aj + diag[i] - 1 ;
909     nz  = diag[i] - ai[i];
910     while (nz--) {
911       tmp[*vi-- ] -= (*v--)*tmp[i];
912     }
913   }
914 
915   /* copy tmp into x according to permutation */
916   for (i=0; i<n; i++) x[r[i]] += tmp[i];
917 
918   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
919   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
920   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
921   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
922 
923   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
924   PetscFunctionReturn(0);
925 }
926 /* ----------------------------------------------------------------*/
927 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
931 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
932 {
933   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
934   IS                 isicol;
935   PetscErrorCode     ierr;
936   PetscInt           *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j;
937   PetscInt           *bi,*cols,nnz,*cols_lvl;
938   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
939   PetscInt           i,levels,diagonal_fill;
940   PetscTruth         col_identity,row_identity;
941   PetscReal          f;
942   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
943   PetscBT            lnkbt;
944   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
945   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
946   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
947 
948   PetscFunctionBegin;
949   f             = info->fill;
950   levels        = (PetscInt)info->levels;
951   diagonal_fill = (PetscInt)info->diagonal_fill;
952   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
953 
954   /* special case that simply copies fill pattern */
955   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
956   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
957   if (!levels && row_identity && col_identity) {
958     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
959     (*fact)->factor                 = FACTOR_LU;
960     (*fact)->info.factor_mallocs    = 0;
961     (*fact)->info.fill_ratio_given  = info->fill;
962     (*fact)->info.fill_ratio_needed = 1.0;
963     b               = (Mat_SeqAIJ*)(*fact)->data;
964     if (!b->diag) {
965       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
966     }
967     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
968     b->row              = isrow;
969     b->col              = iscol;
970     b->icol             = isicol;
971     ierr                = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
972     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
973     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
974     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
975     PetscFunctionReturn(0);
976   }
977 
978   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
979   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
980 
981   /* get new row pointers */
982   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
983   bi[0] = 0;
984   /* bdiag is location of diagonal in factor */
985   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
986   bdiag[0]  = 0;
987 
988   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
989   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
990 
991   /* create a linked list for storing column indices of the active row */
992   nlnk = n + 1;
993   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
994 
995   /* initial FreeSpace size is f*(ai[n]+1) */
996   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
997   current_space = free_space;
998   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
999   current_space_lvl = free_space_lvl;
1000 
1001   for (i=0; i<n; i++) {
1002     nzi = 0;
1003     /* copy current row into linked list */
1004     nnz  = ai[r[i]+1] - ai[r[i]];
1005     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1006     cols = aj + ai[r[i]];
1007     lnk[i] = -1; /* marker to indicate if diagonal exists */
1008     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1009     nzi += nlnk;
1010 
1011     /* make sure diagonal entry is included */
1012     if (diagonal_fill && lnk[i] == -1) {
1013       fm = n;
1014       while (lnk[fm] < i) fm = lnk[fm];
1015       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1016       lnk[fm]    = i;
1017       lnk_lvl[i] = 0;
1018       nzi++; dcount++;
1019     }
1020 
1021     /* add pivot rows into the active row */
1022     nzbd = 0;
1023     prow = lnk[n];
1024     while (prow < i) {
1025       nnz      = bdiag[prow];
1026       cols     = bj_ptr[prow] + nnz + 1;
1027       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1028       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1029       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1030       nzi += nlnk;
1031       prow = lnk[prow];
1032       nzbd++;
1033     }
1034     bdiag[i] = nzbd;
1035     bi[i+1]  = bi[i] + nzi;
1036 
1037     /* if free space is not available, make more free space */
1038     if (current_space->local_remaining<nzi) {
1039       nnz = nzi*(n - i); /* estimated and max additional space needed */
1040       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1041       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1042       reallocs++;
1043     }
1044 
1045     /* copy data into free_space and free_space_lvl, then initialize lnk */
1046     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1047     bj_ptr[i]    = current_space->array;
1048     bjlvl_ptr[i] = current_space_lvl->array;
1049 
1050     /* make sure the active row i has diagonal entry */
1051     if (*(bj_ptr[i]+bdiag[i]) != i) {
1052       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1053     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1054     }
1055 
1056     current_space->array           += nzi;
1057     current_space->local_used      += nzi;
1058     current_space->local_remaining -= nzi;
1059     current_space_lvl->array           += nzi;
1060     current_space_lvl->local_used      += nzi;
1061     current_space_lvl->local_remaining -= nzi;
1062   }
1063 
1064   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1065   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1066 
1067   /* destroy list of free space and other temporary arrays */
1068   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1069   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1070   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1071   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1072   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1073 
1074 #if defined(PETSC_USE_INFO)
1075   {
1076     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1077     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1078     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1079     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1080     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1081     if (diagonal_fill) {
1082       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1083     }
1084   }
1085 #endif
1086 
1087   /* put together the new matrix */
1088   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
1089   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
1090   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1091   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1092   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1093   b = (Mat_SeqAIJ*)(*fact)->data;
1094   b->free_a       = PETSC_TRUE;
1095   b->free_ij      = PETSC_TRUE;
1096   b->singlemalloc = PETSC_FALSE;
1097   len = (bi[n] )*sizeof(PetscScalar);
1098   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1099   b->j          = bj;
1100   b->i          = bi;
1101   for (i=0; i<n; i++) bdiag[i] += bi[i];
1102   b->diag       = bdiag;
1103   b->ilen       = 0;
1104   b->imax       = 0;
1105   b->row        = isrow;
1106   b->col        = iscol;
1107   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1108   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1109   b->icol       = isicol;
1110   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1111   /* In b structure:  Free imax, ilen, old a, old j.
1112      Allocate bdiag, solve_work, new a, new j */
1113   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1114   b->maxnz             = b->nz = bi[n] ;
1115   (*fact)->factor = FACTOR_LU;
1116   (*fact)->info.factor_mallocs    = reallocs;
1117   (*fact)->info.fill_ratio_given  = f;
1118   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1119 
1120   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1121   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1122 
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #include "src/mat/impls/sbaij/seq/sbaij.h"
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1129 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1130 {
1131   Mat            C = *B;
1132   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1133   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1134   IS             ip=b->row;
1135   PetscErrorCode ierr;
1136   PetscInt       *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1137   PetscInt       *ai=a->i,*aj=a->j;
1138   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1139   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1140   PetscReal      zeropivot,rs,shiftnz;
1141   PetscReal      shiftpd;
1142   ChShift_Ctx    sctx;
1143   PetscInt       newshift;
1144 
1145   PetscFunctionBegin;
1146   shiftnz   = info->shiftnz;
1147   shiftpd   = info->shiftpd;
1148   zeropivot = info->zeropivot;
1149 
1150   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1151 
1152   /* initialization */
1153   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1154   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1155   jl   = il + mbs;
1156   rtmp = (MatScalar*)(jl + mbs);
1157 
1158   sctx.shift_amount = 0;
1159   sctx.nshift       = 0;
1160   do {
1161     sctx.chshift = PETSC_FALSE;
1162     for (i=0; i<mbs; i++) {
1163       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1164     }
1165 
1166     for (k = 0; k<mbs; k++){
1167       bval = ba + bi[k];
1168       /* initialize k-th row by the perm[k]-th row of A */
1169       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1170       for (j = jmin; j < jmax; j++){
1171         col = rip[aj[j]];
1172         if (col >= k){ /* only take upper triangular entry */
1173           rtmp[col] = aa[j];
1174           *bval++  = 0.0; /* for in-place factorization */
1175         }
1176       }
1177       /* shift the diagonal of the matrix */
1178       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1179 
1180       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1181       dk = rtmp[k];
1182       i = jl[k]; /* first row to be added to k_th row  */
1183 
1184       while (i < k){
1185         nexti = jl[i]; /* next row to be added to k_th row */
1186 
1187         /* compute multiplier, update diag(k) and U(i,k) */
1188         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1189         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1190         dk += uikdi*ba[ili];
1191         ba[ili] = uikdi; /* -U(i,k) */
1192 
1193         /* add multiple of row i to k-th row */
1194         jmin = ili + 1; jmax = bi[i+1];
1195         if (jmin < jmax){
1196           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1197           /* update il and jl for row i */
1198           il[i] = jmin;
1199           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1200         }
1201         i = nexti;
1202       }
1203 
1204       /* shift the diagonals when zero pivot is detected */
1205       /* compute rs=sum of abs(off-diagonal) */
1206       rs   = 0.0;
1207       jmin = bi[k]+1;
1208       nz   = bi[k+1] - jmin;
1209       if (nz){
1210         bcol = bj + jmin;
1211         while (nz--){
1212           rs += PetscAbsScalar(rtmp[*bcol]);
1213           bcol++;
1214         }
1215       }
1216 
1217       sctx.rs = rs;
1218       sctx.pv = dk;
1219       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1220       if (newshift == 1) break;
1221 
1222       /* copy data into U(k,:) */
1223       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1224       jmin = bi[k]+1; jmax = bi[k+1];
1225       if (jmin < jmax) {
1226         for (j=jmin; j<jmax; j++){
1227           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1228         }
1229         /* add the k-th row into il and jl */
1230         il[k] = jmin;
1231         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1232       }
1233     }
1234   } while (sctx.chshift);
1235   ierr = PetscFree(il);CHKERRQ(ierr);
1236 
1237   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1238   C->factor       = FACTOR_CHOLESKY;
1239   C->assembled    = PETSC_TRUE;
1240   C->preallocated = PETSC_TRUE;
1241   ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr);
1242   if (sctx.nshift){
1243     if (shiftnz) {
1244       ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1245     } else if (shiftpd) {
1246       ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1247     }
1248   }
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1254 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1255 {
1256   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1257   Mat_SeqSBAIJ       *b;
1258   Mat                B;
1259   PetscErrorCode     ierr;
1260   PetscTruth         perm_identity;
1261   PetscInt           reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1262   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1263   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1264   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1265   PetscReal          fill=info->fill,levels=info->levels;
1266   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1267   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1268   PetscBT            lnkbt;
1269 
1270   PetscFunctionBegin;
1271   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1272   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1273 
1274   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1275   ui[0] = 0;
1276 
1277   /* special case that simply copies fill pattern */
1278   if (!levels && perm_identity) {
1279     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1280     for (i=0; i<am; i++) {
1281       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1282     }
1283     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1284     cols = uj;
1285     for (i=0; i<am; i++) {
1286       aj    = a->j + a->diag[i];
1287       ncols = ui[i+1] - ui[i];
1288       for (j=0; j<ncols; j++) *cols++ = *aj++;
1289     }
1290   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1291     /* initialization */
1292     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1293 
1294     /* jl: linked list for storing indices of the pivot rows
1295        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1296     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1297     il         = jl + am;
1298     uj_ptr     = (PetscInt**)(il + am);
1299     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1300     for (i=0; i<am; i++){
1301       jl[i] = am; il[i] = 0;
1302     }
1303 
1304     /* create and initialize a linked list for storing column indices of the active row k */
1305     nlnk = am + 1;
1306     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1307 
1308     /* initial FreeSpace size is fill*(ai[am]+1) */
1309     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1310     current_space = free_space;
1311     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1312     current_space_lvl = free_space_lvl;
1313 
1314     for (k=0; k<am; k++){  /* for each active row k */
1315       /* initialize lnk by the column indices of row rip[k] of A */
1316       nzk   = 0;
1317       ncols = ai[rip[k]+1] - ai[rip[k]];
1318       ncols_upper = 0;
1319       for (j=0; j<ncols; j++){
1320         i = *(aj + ai[rip[k]] + j);
1321         if (rip[i] >= k){ /* only take upper triangular entry */
1322           ajtmp[ncols_upper] = i;
1323           ncols_upper++;
1324         }
1325       }
1326       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1327       nzk += nlnk;
1328 
1329       /* update lnk by computing fill-in for each pivot row to be merged in */
1330       prow = jl[k]; /* 1st pivot row */
1331 
1332       while (prow < k){
1333         nextprow = jl[prow];
1334 
1335         /* merge prow into k-th row */
1336         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1337         jmax = ui[prow+1];
1338         ncols = jmax-jmin;
1339         i     = jmin - ui[prow];
1340         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1341         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1342         j     = *(uj - 1);
1343         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1344         nzk += nlnk;
1345 
1346         /* update il and jl for prow */
1347         if (jmin < jmax){
1348           il[prow] = jmin;
1349           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1350         }
1351         prow = nextprow;
1352       }
1353 
1354       /* if free space is not available, make more free space */
1355       if (current_space->local_remaining<nzk) {
1356         i = am - k + 1; /* num of unfactored rows */
1357         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1358         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1359         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1360         reallocs++;
1361       }
1362 
1363       /* copy data into free_space and free_space_lvl, then initialize lnk */
1364       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1365 
1366       /* add the k-th row into il and jl */
1367       if (nzk > 1){
1368         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1369         jl[k] = jl[i]; jl[i] = k;
1370         il[k] = ui[k] + 1;
1371       }
1372       uj_ptr[k]     = current_space->array;
1373       uj_lvl_ptr[k] = current_space_lvl->array;
1374 
1375       current_space->array           += nzk;
1376       current_space->local_used      += nzk;
1377       current_space->local_remaining -= nzk;
1378 
1379       current_space_lvl->array           += nzk;
1380       current_space_lvl->local_used      += nzk;
1381       current_space_lvl->local_remaining -= nzk;
1382 
1383       ui[k+1] = ui[k] + nzk;
1384     }
1385 
1386 #if defined(PETSC_USE_INFO)
1387     if (ai[am] != 0) {
1388       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1389       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1390       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1391       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1392     } else {
1393       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1394     }
1395 #endif
1396 
1397     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1398     ierr = PetscFree(jl);CHKERRQ(ierr);
1399     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1400 
1401     /* destroy list of free space and other temporary array(s) */
1402     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1403     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1404     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1405     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1406 
1407   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1408 
1409   /* put together the new matrix in MATSEQSBAIJ format */
1410   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1411   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1412   B = *fact;
1413   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1414   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1415 
1416   b    = (Mat_SeqSBAIJ*)B->data;
1417   b->singlemalloc = PETSC_FALSE;
1418   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1419   b->j    = uj;
1420   b->i    = ui;
1421   b->diag = 0;
1422   b->ilen = 0;
1423   b->imax = 0;
1424   b->row  = perm;
1425   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1426   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1427   b->icol = perm;
1428   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1429   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1430   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1431   b->maxnz = b->nz = ui[am];
1432   b->free_a  = PETSC_TRUE;
1433   b->free_ij = PETSC_TRUE;
1434 
1435   B->factor                 = FACTOR_CHOLESKY;
1436   B->info.factor_mallocs    = reallocs;
1437   B->info.fill_ratio_given  = fill;
1438   if (ai[am] != 0) {
1439     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1440   } else {
1441     B->info.fill_ratio_needed = 0.0;
1442   }
1443   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1444   if (perm_identity){
1445     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1446     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1447   }
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1453 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1454 {
1455   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1456   Mat_SeqSBAIJ       *b;
1457   Mat                B;
1458   PetscErrorCode     ierr;
1459   PetscTruth         perm_identity;
1460   PetscReal          fill = info->fill;
1461   PetscInt           *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1462   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1463   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1464   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1465   PetscBT            lnkbt;
1466   IS                 iperm;
1467 
1468   PetscFunctionBegin;
1469   /* check whether perm is the identity mapping */
1470   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1471   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1472 
1473   if (!perm_identity){
1474     /* check if perm is symmetric! */
1475     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1476     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1477     for (i=0; i<am; i++) {
1478       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1479     }
1480     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1481     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1482   }
1483 
1484   /* initialization */
1485   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1486   ui[0] = 0;
1487 
1488   /* jl: linked list for storing indices of the pivot rows
1489      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1490   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1491   il     = jl + am;
1492   cols   = il + am;
1493   ui_ptr = (PetscInt**)(cols + am);
1494   for (i=0; i<am; i++){
1495     jl[i] = am; il[i] = 0;
1496   }
1497 
1498   /* create and initialize a linked list for storing column indices of the active row k */
1499   nlnk = am + 1;
1500   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1501 
1502   /* initial FreeSpace size is fill*(ai[am]+1) */
1503   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1504   current_space = free_space;
1505 
1506   for (k=0; k<am; k++){  /* for each active row k */
1507     /* initialize lnk by the column indices of row rip[k] of A */
1508     nzk   = 0;
1509     ncols = ai[rip[k]+1] - ai[rip[k]];
1510     ncols_upper = 0;
1511     for (j=0; j<ncols; j++){
1512       i = rip[*(aj + ai[rip[k]] + j)];
1513       if (i >= k){ /* only take upper triangular entry */
1514         cols[ncols_upper] = i;
1515         ncols_upper++;
1516       }
1517     }
1518     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1519     nzk += nlnk;
1520 
1521     /* update lnk by computing fill-in for each pivot row to be merged in */
1522     prow = jl[k]; /* 1st pivot row */
1523 
1524     while (prow < k){
1525       nextprow = jl[prow];
1526       /* merge prow into k-th row */
1527       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1528       jmax = ui[prow+1];
1529       ncols = jmax-jmin;
1530       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1531       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1532       nzk += nlnk;
1533 
1534       /* update il and jl for prow */
1535       if (jmin < jmax){
1536         il[prow] = jmin;
1537         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1538       }
1539       prow = nextprow;
1540     }
1541 
1542     /* if free space is not available, make more free space */
1543     if (current_space->local_remaining<nzk) {
1544       i = am - k + 1; /* num of unfactored rows */
1545       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1546       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1547       reallocs++;
1548     }
1549 
1550     /* copy data into free space, then initialize lnk */
1551     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1552 
1553     /* add the k-th row into il and jl */
1554     if (nzk-1 > 0){
1555       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1556       jl[k] = jl[i]; jl[i] = k;
1557       il[k] = ui[k] + 1;
1558     }
1559     ui_ptr[k] = current_space->array;
1560     current_space->array           += nzk;
1561     current_space->local_used      += nzk;
1562     current_space->local_remaining -= nzk;
1563 
1564     ui[k+1] = ui[k] + nzk;
1565   }
1566 
1567 #if defined(PETSC_USE_INFO)
1568   if (ai[am] != 0) {
1569     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1570     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1571     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1572     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1573   } else {
1574      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1575   }
1576 #endif
1577 
1578   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1579   ierr = PetscFree(jl);CHKERRQ(ierr);
1580 
1581   /* destroy list of free space and other temporary array(s) */
1582   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1583   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1584   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1585 
1586   /* put together the new matrix in MATSEQSBAIJ format */
1587   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1588   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1589   B    = *fact;
1590   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1591   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1592 
1593   b = (Mat_SeqSBAIJ*)B->data;
1594   b->singlemalloc = PETSC_FALSE;
1595   b->free_a       = PETSC_TRUE;
1596   b->free_ij      = PETSC_TRUE;
1597   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1598   b->j    = uj;
1599   b->i    = ui;
1600   b->diag = 0;
1601   b->ilen = 0;
1602   b->imax = 0;
1603   b->row  = perm;
1604   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1605   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1606   b->icol = perm;
1607   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1608   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1609   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1610   b->maxnz = b->nz = ui[am];
1611 
1612   B->factor                 = FACTOR_CHOLESKY;
1613   B->info.factor_mallocs    = reallocs;
1614   B->info.fill_ratio_given  = fill;
1615   if (ai[am] != 0) {
1616     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1617   } else {
1618     B->info.fill_ratio_needed = 0.0;
1619   }
1620   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1621   if (perm_identity){
1622     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1623     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627