xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 7d6bfa3b9d7db0ccd4cc481237114ca8dbb0dbff)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/seq/aij.h"
4 #include "../src/inline/dot.h"
5 #include "../src/inline/spops.h"
6 #include "petscbt.h"
7 #include "../src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   const PetscInt *c,*r,*ic;
64   PetscInt       i,n = A->rmap->n,*cc,*cr;
65   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
66   PetscInt       *ordcol,*iwk,*iperm,*jw;
67   PetscInt       jmax,lfill,job,*o_i,*o_j;
68   MatScalar      *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
69   PetscReal      af,dt,dtcount,dtcol,fill;
70 
71   PetscFunctionBegin;
72 
73   if (info->dt == PETSC_DEFAULT)      dt      = .005; else dt = info->dt;
74   if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);  else dtcount = info->dtcount;
75   if (info->dtcol == PETSC_DEFAULT)   dtcol   = .01; else dtcol = info->dtcol;
76   if (info->fill == PETSC_DEFAULT)    fill    = ((double)(n*(dtcount+1)))/a->nz; else fill = info->fill;
77   lfill   = (PetscInt)(dtcount/2.0);
78   jmax    = (PetscInt)(fill*a->nz);
79 
80 
81   /* ------------------------------------------------------------
82      If reorder=.TRUE., then the original matrix has to be
83      reordered to reflect the user selected ordering scheme, and
84      then de-reordered so it is in it's original format.
85      Because Saad's dperm() is NOT in place, we have to copy
86      the original matrix and allocate more storage. . .
87      ------------------------------------------------------------
88   */
89 
90   /* set reorder to true if either isrow or iscol is not identity */
91   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
92   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
93   reorder = PetscNot(reorder);
94 
95 
96   /* storage for ilu factor */
97   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr);
100   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
101 
102   /* ------------------------------------------------------------
103      Make sure that everything is Fortran formatted (1-Based)
104      ------------------------------------------------------------
105   */
106   for (i=old_i[0];i<old_i[n];i++) {
107     old_j[i]++;
108   }
109   for(i=0;i<n+1;i++) {
110     old_i[i]++;
111   };
112 
113 
114   if (reorder) {
115     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
116     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
117     ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       cr[i]  = r[i]+1;
120       cc[i]  = c[i]+1;
121     }
122     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
123     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
124     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
125     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
126     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr);
127     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job);
128     ierr = PetscFree2(cc,cr);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax);
151       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax);
152       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax);
155       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr);
222   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
223   ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr);
224   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
225   (*fact)->factor    = MAT_FACTOR_LU;
226   (*fact)->assembled = PETSC_TRUE;
227 
228   b = (Mat_SeqAIJ*)(*fact)->data;
229   b->free_a        = PETSC_TRUE;
230   b->free_ij       = PETSC_TRUE;
231   b->singlemalloc  = PETSC_FALSE;
232   b->a             = new_a;
233   b->j             = new_j;
234   b->i             = new_i;
235   b->ilen          = 0;
236   b->imax          = 0;
237   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
238   b->row           = isirow;
239   b->col           = iscolf;
240   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
241   b->maxnz = b->nz = new_i[n];
242   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
243   (*fact)->info.factor_mallocs = 0;
244 
245   af = ((double)b->nz)/((double)a->nz) + .001;
246   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr);
247   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
248   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
249   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
250 
251   if (reorder) (*fact)->ops->solve = MatSolve_SeqAIJ;
252   else         (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
253   (*fact)->ops->solveadd           = MatSolveAdd_SeqAIJ;
254   (*fact)->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
255   (*fact)->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
256 
257   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
258 
259   PetscFunctionReturn(0);
260 #endif
261 }
262 
263 EXTERN_C_BEGIN
264 #undef __FUNCT__
265 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
266 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
267 {
268   PetscFunctionBegin;
269   *flg = PETSC_TRUE;
270   PetscFunctionReturn(0);
271 }
272 EXTERN_C_END
273 
274 EXTERN_C_BEGIN
275 #undef __FUNCT__
276 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
277 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
278 {
279   PetscInt           n = A->rmap->n;
280   PetscErrorCode     ierr;
281 
282   PetscFunctionBegin;
283   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
284   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
285   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
286     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
287     if (ftype == MAT_FACTOR_ILU) {
288       (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ;
289     } else {
290       (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
291     }
292   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
293     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
294     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
295     if (ftype == MAT_FACTOR_ICC) {
296       (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
297     } else {
298       (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
299     }
300   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
301   (*B)->factor = ftype;
302   PetscFunctionReturn(0);
303 }
304 EXTERN_C_END
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
308 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
309 {
310   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
311   IS                 isicol;
312   PetscErrorCode     ierr;
313   const PetscInt     *r,*ic;
314   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
315   PetscInt           *bi,*bj,*ajtmp;
316   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
317   PetscReal          f;
318   PetscInt           nlnk,*lnk,k,**bi_ptr;
319   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
320   PetscBT            lnkbt;
321 
322   PetscFunctionBegin;
323   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
324   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
325   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
326   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
327 
328   /* get new row pointers */
329   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
330   bi[0] = 0;
331 
332   /* bdiag is location of diagonal in factor */
333   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
334   bdiag[0] = 0;
335 
336   /* linked list for storing column indices of the active row */
337   nlnk = n + 1;
338   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
339 
340   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
341 
342   /* initial FreeSpace size is f*(ai[n]+1) */
343   f = info->fill;
344   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
345   current_space = free_space;
346 
347   for (i=0; i<n; i++) {
348     /* copy previous fill into linked list */
349     nzi = 0;
350     nnz = ai[r[i]+1] - ai[r[i]];
351     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
352     ajtmp = aj + ai[r[i]];
353     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
354     nzi += nlnk;
355 
356     /* add pivot rows into linked list */
357     row = lnk[n];
358     while (row < i) {
359       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
360       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
361       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
362       nzi += nlnk;
363       row  = lnk[row];
364     }
365     bi[i+1] = bi[i] + nzi;
366     im[i]   = nzi;
367 
368     /* mark bdiag */
369     nzbd = 0;
370     nnz  = nzi;
371     k    = lnk[n];
372     while (nnz-- && k < i){
373       nzbd++;
374       k = lnk[k];
375     }
376     bdiag[i] = bi[i] + nzbd;
377 
378     /* if free space is not available, make more free space */
379     if (current_space->local_remaining<nzi) {
380       nnz = (n - i)*nzi; /* estimated and max additional space needed */
381       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
382       reallocs++;
383     }
384 
385     /* copy data into free space, then initialize lnk */
386     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
387     bi_ptr[i] = current_space->array;
388     current_space->array           += nzi;
389     current_space->local_used      += nzi;
390     current_space->local_remaining -= nzi;
391   }
392 #if defined(PETSC_USE_INFO)
393   if (ai[n] != 0) {
394     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
395     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
396     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
397     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
398     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
399   } else {
400     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
401   }
402 #endif
403 
404   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
405   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
406 
407   /* destroy list of free space and other temporary array(s) */
408   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
409   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
410   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
411   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
412 
413   /* put together the new matrix */
414   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
415   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
416   b    = (Mat_SeqAIJ*)(B)->data;
417   b->free_a       = PETSC_TRUE;
418   b->free_ij      = PETSC_TRUE;
419   b->singlemalloc = PETSC_FALSE;
420   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
421   b->j          = bj;
422   b->i          = bi;
423   b->diag       = bdiag;
424   b->ilen       = 0;
425   b->imax       = 0;
426   b->row        = isrow;
427   b->col        = iscol;
428   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
429   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
430   b->icol       = isicol;
431   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
432 
433   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
434   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
435   b->maxnz = b->nz = bi[n] ;
436 
437   (B)->factor                 =  MAT_FACTOR_LU;
438   (B)->info.factor_mallocs    = reallocs;
439   (B)->info.fill_ratio_given  = f;
440 
441   if (ai[n] != 0) {
442     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
443   } else {
444     (B)->info.fill_ratio_needed = 0.0;
445   }
446   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
447   (B)->ops->solve            = MatSolve_SeqAIJ;
448   (B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
449   /* switch to inodes if appropriate */
450   ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 /*
455     Trouble in factorization, should we dump the original matrix?
456 */
457 #undef __FUNCT__
458 #define __FUNCT__ "MatFactorDumpMatrix"
459 PetscErrorCode MatFactorDumpMatrix(Mat A)
460 {
461   PetscErrorCode ierr;
462   PetscTruth     flg;
463 
464   PetscFunctionBegin;
465   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr);
466   if (flg) {
467     PetscViewer viewer;
468     char        filename[PETSC_MAX_PATH_LEN];
469 
470     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
471     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
472     ierr = MatView(A,viewer);CHKERRQ(ierr);
473     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
474   }
475   PetscFunctionReturn(0);
476 }
477 
478 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
479 
480 /* ----------------------------------------------------------- */
481 #undef __FUNCT__
482 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
483 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
484 {
485   Mat            C=B;
486   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
487   IS             isrow = b->row,isicol = b->icol;
488   PetscErrorCode ierr;
489   const PetscInt  *r,*ic,*ics;
490   PetscInt       i,j,n=A->rmap->n,*bi=b->i,*bj=b->j;
491   PetscInt       *ajtmp,*bjtmp,nz,row;
492   PetscInt       *diag_offset = b->diag,diag,*pj;
493   PetscScalar    *rtmp,*pc,multiplier,*rtmps;
494   MatScalar      *v,*pv;
495   PetscScalar    d;
496   PetscReal      rs;
497   LUShift_Ctx    sctx;
498   PetscInt       newshift,*ddiag;
499 
500   PetscFunctionBegin;
501   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
502   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
503   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
504   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
505   rtmps = rtmp; ics = ic;
506 
507   sctx.shift_top      = 0;
508   sctx.nshift_max     = 0;
509   sctx.shift_lo       = 0;
510   sctx.shift_hi       = 0;
511   sctx.shift_fraction = 0;
512 
513   /* if both shift schemes are chosen by user, only use info->shiftpd */
514   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
515     PetscInt *aai = a->i;
516     ddiag          = a->diag;
517     sctx.shift_top = 0;
518     for (i=0; i<n; i++) {
519       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
520       d  = (a->a)[ddiag[i]];
521       rs = -PetscAbsScalar(d) - PetscRealPart(d);
522       v  = a->a+aai[i];
523       nz = aai[i+1] - aai[i];
524       for (j=0; j<nz; j++)
525 	rs += PetscAbsScalar(v[j]);
526       if (rs>sctx.shift_top) sctx.shift_top = rs;
527     }
528     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
529     sctx.shift_top    *= 1.1;
530     sctx.nshift_max   = 5;
531     sctx.shift_lo     = 0.;
532     sctx.shift_hi     = 1.;
533   }
534 
535   sctx.shift_amount = 0;
536   sctx.nshift       = 0;
537   do {
538     sctx.lushift = PETSC_FALSE;
539     for (i=0; i<n; i++){
540       nz    = bi[i+1] - bi[i];
541       bjtmp = bj + bi[i];
542       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
543 
544       /* load in initial (unfactored row) */
545       nz    = a->i[r[i]+1] - a->i[r[i]];
546       ajtmp = a->j + a->i[r[i]];
547       v     = a->a + a->i[r[i]];
548       for (j=0; j<nz; j++) {
549         rtmp[ics[ajtmp[j]]] = v[j];
550       }
551       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
552 
553       row = *bjtmp++;
554       while  (row < i) {
555         pc = rtmp + row;
556         if (*pc != 0.0) {
557           pv         = b->a + diag_offset[row];
558           pj         = b->j + diag_offset[row] + 1;
559           multiplier = *pc / *pv++;
560           *pc        = multiplier;
561           nz         = bi[row+1] - diag_offset[row] - 1;
562           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
563           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
564         }
565         row = *bjtmp++;
566       }
567       /* finished row so stick it into b->a */
568       pv   = b->a + bi[i] ;
569       pj   = b->j + bi[i] ;
570       nz   = bi[i+1] - bi[i];
571       diag = diag_offset[i] - bi[i];
572       rs   = 0.0;
573       for (j=0; j<nz; j++) {
574         pv[j] = rtmps[pj[j]];
575         if (j != diag) rs += PetscAbsScalar(pv[j]);
576       }
577 
578       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
579       sctx.rs  = rs;
580       sctx.pv  = pv[diag];
581       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
582       if (newshift == 1) break;
583     }
584 
585     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
586       /*
587        * if no shift in this attempt & shifting & started shifting & can refine,
588        * then try lower shift
589        */
590       sctx.shift_hi        = sctx.shift_fraction;
591       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
592       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
593       sctx.lushift         = PETSC_TRUE;
594       sctx.nshift++;
595     }
596   } while (sctx.lushift);
597 
598   /* invert diagonal entries for simplier triangular solves */
599   for (i=0; i<n; i++) {
600     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
601   }
602   ierr = PetscFree(rtmp);CHKERRQ(ierr);
603   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
604   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
605   if (b->inode.use) {
606     C->ops->solve   = MatSolve_Inode;
607   } else {
608     PetscTruth row_identity, col_identity;
609     ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
610     ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
611     if (row_identity && col_identity) {
612       C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
613     } else {
614       C->ops->solve   = MatSolve_SeqAIJ;
615     }
616   }
617   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
618   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
619   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
620   C->ops->matsolve           = MatMatSolve_SeqAIJ;
621   C->assembled    = PETSC_TRUE;
622   C->preallocated = PETSC_TRUE;
623   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
624   if (sctx.nshift){
625      if (info->shiftpd) {
626       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
627     } else if (info->shiftnz) {
628       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 /*
635    This routine implements inplace ILU(0) with row or/and column permutations.
636    Input:
637      A - original matrix
638    Output;
639      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
640          a->j (col index) is permuted by the inverse of colperm, then sorted
641          a->a reordered accordingly with a->j
642          a->diag (ptr to diagonal elements) is updated.
643 */
644 #undef __FUNCT__
645 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
646 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
647 {
648   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
649   IS             isrow = a->row,isicol = a->icol;
650   PetscErrorCode ierr;
651   const PetscInt *r,*ic,*ics;
652   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
653   PetscInt       *ajtmp,nz,row;
654   PetscInt       *diag = a->diag,nbdiag,*pj;
655   PetscScalar    *rtmp,*pc,multiplier,d;
656   MatScalar      *v,*pv;
657   PetscReal      rs;
658   LUShift_Ctx    sctx;
659   PetscInt       newshift;
660 
661   PetscFunctionBegin;
662   if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
663   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
664   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
665   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
666   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
667   ics = ic;
668 
669   sctx.shift_top      = 0;
670   sctx.nshift_max     = 0;
671   sctx.shift_lo       = 0;
672   sctx.shift_hi       = 0;
673   sctx.shift_fraction = 0;
674 
675   /* if both shift schemes are chosen by user, only use info->shiftpd */
676   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
677     sctx.shift_top = 0;
678     for (i=0; i<n; i++) {
679       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
680       d  = (a->a)[diag[i]];
681       rs = -PetscAbsScalar(d) - PetscRealPart(d);
682       v  = a->a+ai[i];
683       nz = ai[i+1] - ai[i];
684       for (j=0; j<nz; j++)
685 	rs += PetscAbsScalar(v[j]);
686       if (rs>sctx.shift_top) sctx.shift_top = rs;
687     }
688     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
689     sctx.shift_top    *= 1.1;
690     sctx.nshift_max   = 5;
691     sctx.shift_lo     = 0.;
692     sctx.shift_hi     = 1.;
693   }
694 
695   sctx.shift_amount = 0;
696   sctx.nshift       = 0;
697   do {
698     sctx.lushift = PETSC_FALSE;
699     for (i=0; i<n; i++){
700       /* load in initial unfactored row */
701       nz    = ai[r[i]+1] - ai[r[i]];
702       ajtmp = aj + ai[r[i]];
703       v     = a->a + ai[r[i]];
704       /* sort permuted ajtmp and values v accordingly */
705       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
706       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
707 
708       diag[r[i]] = ai[r[i]];
709       for (j=0; j<nz; j++) {
710         rtmp[ajtmp[j]] = v[j];
711         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
712       }
713       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
714 
715       row = *ajtmp++;
716       while  (row < i) {
717         pc = rtmp + row;
718         if (*pc != 0.0) {
719           pv         = a->a + diag[r[row]];
720           pj         = aj + diag[r[row]] + 1;
721 
722           multiplier = *pc / *pv++;
723           *pc        = multiplier;
724           nz         = ai[r[row]+1] - diag[r[row]] - 1;
725           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
726           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
727         }
728         row = *ajtmp++;
729       }
730       /* finished row so overwrite it onto a->a */
731       pv   = a->a + ai[r[i]] ;
732       pj   = aj + ai[r[i]] ;
733       nz   = ai[r[i]+1] - ai[r[i]];
734       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
735 
736       rs   = 0.0;
737       for (j=0; j<nz; j++) {
738         pv[j] = rtmp[pj[j]];
739         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
740       }
741 
742       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
743       sctx.rs  = rs;
744       sctx.pv  = pv[nbdiag];
745       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
746       if (newshift == 1) break;
747     }
748 
749     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
750       /*
751        * if no shift in this attempt & shifting & started shifting & can refine,
752        * then try lower shift
753        */
754       sctx.shift_hi        = sctx.shift_fraction;
755       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
756       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
757       sctx.lushift         = PETSC_TRUE;
758       sctx.nshift++;
759     }
760   } while (sctx.lushift);
761 
762   /* invert diagonal entries for simplier triangular solves */
763   for (i=0; i<n; i++) {
764     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
765   }
766 
767   ierr = PetscFree(rtmp);CHKERRQ(ierr);
768   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
769   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
770   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
771   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
772   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
773   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
774   A->assembled = PETSC_TRUE;
775   A->preallocated = PETSC_TRUE;
776   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
777   if (sctx.nshift){
778     if (info->shiftpd) {
779       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
780     } else if (info->shiftnz) {
781       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
782     }
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 /* ----------------------------------------------------------- */
788 #undef __FUNCT__
789 #define __FUNCT__ "MatLUFactor_SeqAIJ"
790 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
791 {
792   PetscErrorCode ierr;
793   Mat            C;
794 
795   PetscFunctionBegin;
796   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
797   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
798   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
799   A->ops->solve            = C->ops->solve;
800   A->ops->solvetranspose   = C->ops->solvetranspose;
801   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
802   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
803   PetscFunctionReturn(0);
804 }
805 /* ----------------------------------------------------------- */
806 #undef __FUNCT__
807 #define __FUNCT__ "MatSolve_SeqAIJ"
808 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
809 {
810   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
811   IS                iscol = a->col,isrow = a->row;
812   PetscErrorCode    ierr;
813   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
814   PetscInt          nz;
815   const PetscInt    *rout,*cout,*r,*c;
816   PetscScalar       *x,*tmp,*tmps,sum;
817   const PetscScalar *b;
818   const MatScalar   *aa = a->a,*v;
819 
820   PetscFunctionBegin;
821   if (!n) PetscFunctionReturn(0);
822 
823   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
824   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
825   tmp  = a->solve_work;
826 
827   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
828   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
829 
830   /* forward solve the lower triangular */
831   tmp[0] = b[*r++];
832   tmps   = tmp;
833   for (i=1; i<n; i++) {
834     v   = aa + ai[i] ;
835     vi  = aj + ai[i] ;
836     nz  = a->diag[i] - ai[i];
837     sum = b[*r++];
838     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
839     tmp[i] = sum;
840   }
841 
842   /* backward solve the upper triangular */
843   for (i=n-1; i>=0; i--){
844     v   = aa + a->diag[i] + 1;
845     vi  = aj + a->diag[i] + 1;
846     nz  = ai[i+1] - a->diag[i] - 1;
847     sum = tmp[i];
848     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
849     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
850   }
851 
852   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
853   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
854   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
855   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
856   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatMatSolve_SeqAIJ"
862 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
863 {
864   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
865   IS              iscol = a->col,isrow = a->row;
866   PetscErrorCode  ierr;
867   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
868   PetscInt        nz,neq;
869   const PetscInt  *rout,*cout,*r,*c;
870   PetscScalar     *x,*b,*tmp,*tmps,sum;
871   const MatScalar *aa = a->a,*v;
872   PetscTruth      bisdense,xisdense;
873 
874   PetscFunctionBegin;
875   if (!n) PetscFunctionReturn(0);
876 
877   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
878   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
879   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
880   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
881 
882   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
883   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
884 
885   tmp  = a->solve_work;
886   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
887   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
888 
889   for (neq=0; neq<B->cmap->n; neq++){
890     /* forward solve the lower triangular */
891     tmp[0] = b[r[0]];
892     tmps   = tmp;
893     for (i=1; i<n; i++) {
894       v   = aa + ai[i] ;
895       vi  = aj + ai[i] ;
896       nz  = a->diag[i] - ai[i];
897       sum = b[r[i]];
898       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
899       tmp[i] = sum;
900     }
901     /* backward solve the upper triangular */
902     for (i=n-1; i>=0; i--){
903       v   = aa + a->diag[i] + 1;
904       vi  = aj + a->diag[i] + 1;
905       nz  = ai[i+1] - a->diag[i] - 1;
906       sum = tmp[i];
907       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
908       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
909     }
910 
911     b += n;
912     x += n;
913   }
914   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
915   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
916   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
917   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
918   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
924 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
925 {
926   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
927   IS              iscol = a->col,isrow = a->row;
928   PetscErrorCode  ierr;
929   const PetscInt  *r,*c,*rout,*cout;
930   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
931   PetscInt        nz,row;
932   PetscScalar     *x,*b,*tmp,*tmps,sum;
933   const MatScalar *aa = a->a,*v;
934 
935   PetscFunctionBegin;
936   if (!n) PetscFunctionReturn(0);
937 
938   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
939   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
940   tmp  = a->solve_work;
941 
942   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
943   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
944 
945   /* forward solve the lower triangular */
946   tmp[0] = b[*r++];
947   tmps   = tmp;
948   for (row=1; row<n; row++) {
949     i   = rout[row]; /* permuted row */
950     v   = aa + ai[i] ;
951     vi  = aj + ai[i] ;
952     nz  = a->diag[i] - ai[i];
953     sum = b[*r++];
954     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
955     tmp[row] = sum;
956   }
957 
958   /* backward solve the upper triangular */
959   for (row=n-1; row>=0; row--){
960     i   = rout[row]; /* permuted row */
961     v   = aa + a->diag[i] + 1;
962     vi  = aj + a->diag[i] + 1;
963     nz  = ai[i+1] - a->diag[i] - 1;
964     sum = tmp[row];
965     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
966     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
967   }
968 
969   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
970   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
971   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
972   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
973   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 /* ----------------------------------------------------------- */
978 #undef __FUNCT__
979 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
980 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
981 {
982   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
983   PetscErrorCode    ierr;
984   PetscInt          n = A->rmap->n;
985   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
986   PetscScalar       *x;
987   const PetscScalar *b;
988   const MatScalar   *aa = a->a;
989 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
990   PetscInt          adiag_i,i,nz,ai_i;
991   const MatScalar   *v;
992   PetscScalar       sum;
993 #endif
994 
995   PetscFunctionBegin;
996   if (!n) PetscFunctionReturn(0);
997 
998   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
999   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1000 
1001 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1002   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1003 #else
1004   /* forward solve the lower triangular */
1005   x[0] = b[0];
1006   for (i=1; i<n; i++) {
1007     ai_i = ai[i];
1008     v    = aa + ai_i;
1009     vi   = aj + ai_i;
1010     nz   = adiag[i] - ai_i;
1011     sum  = b[i];
1012     while (nz--) sum -= *v++ * x[*vi++];
1013     x[i] = sum;
1014   }
1015 
1016   /* backward solve the upper triangular */
1017   for (i=n-1; i>=0; i--){
1018     adiag_i = adiag[i];
1019     v       = aa + adiag_i + 1;
1020     vi      = aj + adiag_i + 1;
1021     nz      = ai[i+1] - adiag_i - 1;
1022     sum     = x[i];
1023     while (nz--) sum -= *v++ * x[*vi++];
1024     x[i]    = sum*aa[adiag_i];
1025   }
1026 #endif
1027   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
1028   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1029   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1035 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1036 {
1037   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1038   IS              iscol = a->col,isrow = a->row;
1039   PetscErrorCode  ierr;
1040   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1041   PetscInt        nz;
1042   const PetscInt  *rout,*cout,*r,*c;
1043   PetscScalar     *x,*b,*tmp,sum;
1044   const MatScalar *aa = a->a,*v;
1045 
1046   PetscFunctionBegin;
1047   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1048 
1049   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1050   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1051   tmp  = a->solve_work;
1052 
1053   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1054   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1055 
1056   /* forward solve the lower triangular */
1057   tmp[0] = b[*r++];
1058   for (i=1; i<n; i++) {
1059     v   = aa + ai[i] ;
1060     vi  = aj + ai[i] ;
1061     nz  = a->diag[i] - ai[i];
1062     sum = b[*r++];
1063     while (nz--) sum -= *v++ * tmp[*vi++ ];
1064     tmp[i] = sum;
1065   }
1066 
1067   /* backward solve the upper triangular */
1068   for (i=n-1; i>=0; i--){
1069     v   = aa + a->diag[i] + 1;
1070     vi  = aj + a->diag[i] + 1;
1071     nz  = ai[i+1] - a->diag[i] - 1;
1072     sum = tmp[i];
1073     while (nz--) sum -= *v++ * tmp[*vi++ ];
1074     tmp[i] = sum*aa[a->diag[i]];
1075     x[*c--] += tmp[i];
1076   }
1077 
1078   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1079   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1080   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1081   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1082   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1083 
1084   PetscFunctionReturn(0);
1085 }
1086 /* -------------------------------------------------------------------*/
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1089 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1090 {
1091   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1092   IS              iscol = a->col,isrow = a->row;
1093   PetscErrorCode  ierr;
1094   const PetscInt  *rout,*cout,*r,*c;
1095   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1096   PetscInt        nz,*diag = a->diag;
1097   PetscScalar     *x,*b,*tmp,s1;
1098   const MatScalar *aa = a->a,*v;
1099 
1100   PetscFunctionBegin;
1101   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1102   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1103   tmp  = a->solve_work;
1104 
1105   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1106   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1107 
1108   /* copy the b into temp work space according to permutation */
1109   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1110 
1111   /* forward solve the U^T */
1112   for (i=0; i<n; i++) {
1113     v   = aa + diag[i] ;
1114     vi  = aj + diag[i] + 1;
1115     nz  = ai[i+1] - diag[i] - 1;
1116     s1  = tmp[i];
1117     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1118     while (nz--) {
1119       tmp[*vi++ ] -= (*v++)*s1;
1120     }
1121     tmp[i] = s1;
1122   }
1123 
1124   /* backward solve the L^T */
1125   for (i=n-1; i>=0; i--){
1126     v   = aa + diag[i] - 1 ;
1127     vi  = aj + diag[i] - 1 ;
1128     nz  = diag[i] - ai[i];
1129     s1  = tmp[i];
1130     while (nz--) {
1131       tmp[*vi-- ] -= (*v--)*s1;
1132     }
1133   }
1134 
1135   /* copy tmp into x according to permutation */
1136   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1137 
1138   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1139   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1140   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1141   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1142 
1143   ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1149 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1150 {
1151   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1152   IS              iscol = a->col,isrow = a->row;
1153   PetscErrorCode  ierr;
1154   const PetscInt  *r,*c,*rout,*cout;
1155   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1156   PetscInt        nz,*diag = a->diag;
1157   PetscScalar     *x,*b,*tmp;
1158   const MatScalar *aa = a->a,*v;
1159 
1160   PetscFunctionBegin;
1161   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1162 
1163   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1164   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1165   tmp = a->solve_work;
1166 
1167   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1168   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1169 
1170   /* copy the b into temp work space according to permutation */
1171   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1172 
1173   /* forward solve the U^T */
1174   for (i=0; i<n; i++) {
1175     v   = aa + diag[i] ;
1176     vi  = aj + diag[i] + 1;
1177     nz  = ai[i+1] - diag[i] - 1;
1178     tmp[i] *= *v++;
1179     while (nz--) {
1180       tmp[*vi++ ] -= (*v++)*tmp[i];
1181     }
1182   }
1183 
1184   /* backward solve the L^T */
1185   for (i=n-1; i>=0; i--){
1186     v   = aa + diag[i] - 1 ;
1187     vi  = aj + diag[i] - 1 ;
1188     nz  = diag[i] - ai[i];
1189     while (nz--) {
1190       tmp[*vi-- ] -= (*v--)*tmp[i];
1191     }
1192   }
1193 
1194   /* copy tmp into x according to permutation */
1195   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1196 
1197   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1198   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1199   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1200   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1201 
1202   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1203   PetscFunctionReturn(0);
1204 }
1205 /* ----------------------------------------------------------------*/
1206 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1207 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption);
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1211 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1212 {
1213   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1214   IS                 isicol;
1215   PetscErrorCode     ierr;
1216   const PetscInt     *r,*ic;
1217   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1218   PetscInt           *bi,*cols,nnz,*cols_lvl;
1219   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1220   PetscInt           i,levels,diagonal_fill;
1221   PetscTruth         col_identity,row_identity;
1222   PetscReal          f;
1223   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1224   PetscBT            lnkbt;
1225   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1226   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1227   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1228   PetscTruth         missing;
1229 
1230   PetscFunctionBegin;
1231   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1232   f             = info->fill;
1233   levels        = (PetscInt)info->levels;
1234   diagonal_fill = (PetscInt)info->diagonal_fill;
1235   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1236 
1237   /* special case that simply copies fill pattern */
1238   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1239   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1240   if (!levels && row_identity && col_identity) {
1241     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr);
1242     fact->factor = MAT_FACTOR_ILU;
1243     (fact)->info.factor_mallocs    = 0;
1244     (fact)->info.fill_ratio_given  = info->fill;
1245     (fact)->info.fill_ratio_needed = 1.0;
1246     b               = (Mat_SeqAIJ*)(fact)->data;
1247     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1248     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1249     b->row              = isrow;
1250     b->col              = iscol;
1251     b->icol             = isicol;
1252     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1253     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1254     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1255     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1256     ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1257     PetscFunctionReturn(0);
1258   }
1259 
1260   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1261   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1262 
1263   /* get new row pointers */
1264   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1265   bi[0] = 0;
1266   /* bdiag is location of diagonal in factor */
1267   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1268   bdiag[0]  = 0;
1269 
1270   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1271   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1272 
1273   /* create a linked list for storing column indices of the active row */
1274   nlnk = n + 1;
1275   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1276 
1277   /* initial FreeSpace size is f*(ai[n]+1) */
1278   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1279   current_space = free_space;
1280   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1281   current_space_lvl = free_space_lvl;
1282 
1283   for (i=0; i<n; i++) {
1284     nzi = 0;
1285     /* copy current row into linked list */
1286     nnz  = ai[r[i]+1] - ai[r[i]];
1287     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1288     cols = aj + ai[r[i]];
1289     lnk[i] = -1; /* marker to indicate if diagonal exists */
1290     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1291     nzi += nlnk;
1292 
1293     /* make sure diagonal entry is included */
1294     if (diagonal_fill && lnk[i] == -1) {
1295       fm = n;
1296       while (lnk[fm] < i) fm = lnk[fm];
1297       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1298       lnk[fm]    = i;
1299       lnk_lvl[i] = 0;
1300       nzi++; dcount++;
1301     }
1302 
1303     /* add pivot rows into the active row */
1304     nzbd = 0;
1305     prow = lnk[n];
1306     while (prow < i) {
1307       nnz      = bdiag[prow];
1308       cols     = bj_ptr[prow] + nnz + 1;
1309       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1310       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1311       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1312       nzi += nlnk;
1313       prow = lnk[prow];
1314       nzbd++;
1315     }
1316     bdiag[i] = nzbd;
1317     bi[i+1]  = bi[i] + nzi;
1318 
1319     /* if free space is not available, make more free space */
1320     if (current_space->local_remaining<nzi) {
1321       nnz = nzi*(n - i); /* estimated and max additional space needed */
1322       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1323       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1324       reallocs++;
1325     }
1326 
1327     /* copy data into free_space and free_space_lvl, then initialize lnk */
1328     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1329     bj_ptr[i]    = current_space->array;
1330     bjlvl_ptr[i] = current_space_lvl->array;
1331 
1332     /* make sure the active row i has diagonal entry */
1333     if (*(bj_ptr[i]+bdiag[i]) != i) {
1334       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1335     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1336     }
1337 
1338     current_space->array           += nzi;
1339     current_space->local_used      += nzi;
1340     current_space->local_remaining -= nzi;
1341     current_space_lvl->array           += nzi;
1342     current_space_lvl->local_used      += nzi;
1343     current_space_lvl->local_remaining -= nzi;
1344   }
1345 
1346   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1347   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1348 
1349   /* destroy list of free space and other temporary arrays */
1350   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1351   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1352   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1353   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1354   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1355 
1356 #if defined(PETSC_USE_INFO)
1357   {
1358     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1359     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1360     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1361     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1362     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1363     if (diagonal_fill) {
1364       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1365     }
1366   }
1367 #endif
1368 
1369   /* put together the new matrix */
1370   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1371   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1372   b = (Mat_SeqAIJ*)(fact)->data;
1373   b->free_a       = PETSC_TRUE;
1374   b->free_ij      = PETSC_TRUE;
1375   b->singlemalloc = PETSC_FALSE;
1376   len = (bi[n] )*sizeof(PetscScalar);
1377   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1378   b->j          = bj;
1379   b->i          = bi;
1380   for (i=0; i<n; i++) bdiag[i] += bi[i];
1381   b->diag       = bdiag;
1382   b->ilen       = 0;
1383   b->imax       = 0;
1384   b->row        = isrow;
1385   b->col        = iscol;
1386   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1387   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1388   b->icol       = isicol;
1389   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1390   /* In b structure:  Free imax, ilen, old a, old j.
1391      Allocate bdiag, solve_work, new a, new j */
1392   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1393   b->maxnz             = b->nz = bi[n] ;
1394   (fact)->info.factor_mallocs    = reallocs;
1395   (fact)->info.fill_ratio_given  = f;
1396   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1397   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1398   ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #include "../src/mat/impls/sbaij/seq/sbaij.h"
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1405 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
1406 {
1407   Mat            C = B;
1408   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1409   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1410   IS             ip=b->row,iip = b->icol;
1411   PetscErrorCode ierr;
1412   const PetscInt *rip,*riip;
1413   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1414   PetscInt       *ai=a->i,*aj=a->j;
1415   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1416   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1417   PetscReal      zeropivot,rs,shiftnz;
1418   PetscReal      shiftpd;
1419   ChShift_Ctx    sctx;
1420   PetscInt       newshift;
1421   PetscTruth     perm_identity;
1422 
1423   PetscFunctionBegin;
1424 
1425   shiftnz   = info->shiftnz;
1426   shiftpd   = info->shiftpd;
1427   zeropivot = info->zeropivot;
1428 
1429   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1430   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1431 
1432   /* initialization */
1433   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1434   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1435   jl   = il + mbs;
1436   rtmp = (MatScalar*)(jl + mbs);
1437 
1438   sctx.shift_amount = 0;
1439   sctx.nshift       = 0;
1440   do {
1441     sctx.chshift = PETSC_FALSE;
1442     for (i=0; i<mbs; i++) {
1443       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1444     }
1445 
1446     for (k = 0; k<mbs; k++){
1447       bval = ba + bi[k];
1448       /* initialize k-th row by the perm[k]-th row of A */
1449       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1450       for (j = jmin; j < jmax; j++){
1451         col = riip[aj[j]];
1452         if (col >= k){ /* only take upper triangular entry */
1453           rtmp[col] = aa[j];
1454           *bval++  = 0.0; /* for in-place factorization */
1455         }
1456       }
1457       /* shift the diagonal of the matrix */
1458       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1459 
1460       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1461       dk = rtmp[k];
1462       i = jl[k]; /* first row to be added to k_th row  */
1463 
1464       while (i < k){
1465         nexti = jl[i]; /* next row to be added to k_th row */
1466 
1467         /* compute multiplier, update diag(k) and U(i,k) */
1468         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1469         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1470         dk += uikdi*ba[ili];
1471         ba[ili] = uikdi; /* -U(i,k) */
1472 
1473         /* add multiple of row i to k-th row */
1474         jmin = ili + 1; jmax = bi[i+1];
1475         if (jmin < jmax){
1476           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1477           /* update il and jl for row i */
1478           il[i] = jmin;
1479           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1480         }
1481         i = nexti;
1482       }
1483 
1484       /* shift the diagonals when zero pivot is detected */
1485       /* compute rs=sum of abs(off-diagonal) */
1486       rs   = 0.0;
1487       jmin = bi[k]+1;
1488       nz   = bi[k+1] - jmin;
1489       bcol = bj + jmin;
1490       while (nz--){
1491         rs += PetscAbsScalar(rtmp[*bcol]);
1492         bcol++;
1493       }
1494 
1495       sctx.rs = rs;
1496       sctx.pv = dk;
1497       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1498 
1499       if (newshift == 1) {
1500         if (!sctx.shift_amount) {
1501           sctx.shift_amount = 1e-5;
1502         }
1503         break;
1504       }
1505 
1506       /* copy data into U(k,:) */
1507       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1508       jmin = bi[k]+1; jmax = bi[k+1];
1509       if (jmin < jmax) {
1510         for (j=jmin; j<jmax; j++){
1511           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1512         }
1513         /* add the k-th row into il and jl */
1514         il[k] = jmin;
1515         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1516       }
1517     }
1518   } while (sctx.chshift);
1519   ierr = PetscFree(il);CHKERRQ(ierr);
1520 
1521   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1522   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1523 
1524   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
1525   if (perm_identity){
1526     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1527     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1528     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1529     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1530   } else {
1531     (B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1532     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1533     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1534     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1535   }
1536 
1537   C->assembled    = PETSC_TRUE;
1538   C->preallocated = PETSC_TRUE;
1539   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
1540   if (sctx.nshift){
1541     if (shiftnz) {
1542       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1543     } else if (shiftpd) {
1544       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1545     }
1546   }
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1552 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1553 {
1554   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1555   Mat_SeqSBAIJ       *b;
1556   PetscErrorCode     ierr;
1557   PetscTruth         perm_identity,missing;
1558   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1559   const PetscInt     *rip,*riip;
1560   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1561   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1562   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1563   PetscReal          fill=info->fill,levels=info->levels;
1564   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1565   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1566   PetscBT            lnkbt;
1567   IS                 iperm;
1568 
1569   PetscFunctionBegin;
1570   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1571   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1572   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1573   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1574   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1575 
1576   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1577   ui[0] = 0;
1578 
1579   /* ICC(0) without matrix ordering: simply copies fill pattern */
1580   if (!levels && perm_identity) {
1581 
1582     for (i=0; i<am; i++) {
1583       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1584     }
1585     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1586     cols = uj;
1587     for (i=0; i<am; i++) {
1588       aj    = a->j + a->diag[i];
1589       ncols = ui[i+1] - ui[i];
1590       for (j=0; j<ncols; j++) *cols++ = *aj++;
1591     }
1592   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1593     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1594     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1595 
1596     /* initialization */
1597     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1598 
1599     /* jl: linked list for storing indices of the pivot rows
1600        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1601     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1602     il         = jl + am;
1603     uj_ptr     = (PetscInt**)(il + am);
1604     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1605     for (i=0; i<am; i++){
1606       jl[i] = am; il[i] = 0;
1607     }
1608 
1609     /* create and initialize a linked list for storing column indices of the active row k */
1610     nlnk = am + 1;
1611     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1612 
1613     /* initial FreeSpace size is fill*(ai[am]+1) */
1614     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1615     current_space = free_space;
1616     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1617     current_space_lvl = free_space_lvl;
1618 
1619     for (k=0; k<am; k++){  /* for each active row k */
1620       /* initialize lnk by the column indices of row rip[k] of A */
1621       nzk   = 0;
1622       ncols = ai[rip[k]+1] - ai[rip[k]];
1623       if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1624       ncols_upper = 0;
1625       for (j=0; j<ncols; j++){
1626         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1627         if (riip[i] >= k){ /* only take upper triangular entry */
1628           ajtmp[ncols_upper] = i;
1629           ncols_upper++;
1630         }
1631       }
1632       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1633       nzk += nlnk;
1634 
1635       /* update lnk by computing fill-in for each pivot row to be merged in */
1636       prow = jl[k]; /* 1st pivot row */
1637 
1638       while (prow < k){
1639         nextprow = jl[prow];
1640 
1641         /* merge prow into k-th row */
1642         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1643         jmax = ui[prow+1];
1644         ncols = jmax-jmin;
1645         i     = jmin - ui[prow];
1646         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1647         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1648         j     = *(uj - 1);
1649         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1650         nzk += nlnk;
1651 
1652         /* update il and jl for prow */
1653         if (jmin < jmax){
1654           il[prow] = jmin;
1655           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1656         }
1657         prow = nextprow;
1658       }
1659 
1660       /* if free space is not available, make more free space */
1661       if (current_space->local_remaining<nzk) {
1662         i = am - k + 1; /* num of unfactored rows */
1663         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1664         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1665         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1666         reallocs++;
1667       }
1668 
1669       /* copy data into free_space and free_space_lvl, then initialize lnk */
1670       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1671       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1672 
1673       /* add the k-th row into il and jl */
1674       if (nzk > 1){
1675         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1676         jl[k] = jl[i]; jl[i] = k;
1677         il[k] = ui[k] + 1;
1678       }
1679       uj_ptr[k]     = current_space->array;
1680       uj_lvl_ptr[k] = current_space_lvl->array;
1681 
1682       current_space->array           += nzk;
1683       current_space->local_used      += nzk;
1684       current_space->local_remaining -= nzk;
1685 
1686       current_space_lvl->array           += nzk;
1687       current_space_lvl->local_used      += nzk;
1688       current_space_lvl->local_remaining -= nzk;
1689 
1690       ui[k+1] = ui[k] + nzk;
1691     }
1692 
1693 #if defined(PETSC_USE_INFO)
1694     if (ai[am] != 0) {
1695       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1696       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1697       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1698       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1699     } else {
1700       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1701     }
1702 #endif
1703 
1704     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1705     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1706     ierr = PetscFree(jl);CHKERRQ(ierr);
1707     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1708 
1709     /* destroy list of free space and other temporary array(s) */
1710     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1711     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1712     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1713     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1714 
1715   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1716 
1717   /* put together the new matrix in MATSEQSBAIJ format */
1718 
1719   b    = (Mat_SeqSBAIJ*)(fact)->data;
1720   b->singlemalloc = PETSC_FALSE;
1721   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1722   b->j    = uj;
1723   b->i    = ui;
1724   b->diag = 0;
1725   b->ilen = 0;
1726   b->imax = 0;
1727   b->row  = perm;
1728   b->col  = perm;
1729   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1730   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1731   b->icol = iperm;
1732   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1733   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1734   ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1735   b->maxnz   = b->nz = ui[am];
1736   b->free_a  = PETSC_TRUE;
1737   b->free_ij = PETSC_TRUE;
1738 
1739   (fact)->info.factor_mallocs    = reallocs;
1740   (fact)->info.fill_ratio_given  = fill;
1741   if (ai[am] != 0) {
1742     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1743   } else {
1744     (fact)->info.fill_ratio_needed = 0.0;
1745   }
1746   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1752 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1753 {
1754   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1755   Mat_SeqSBAIJ       *b;
1756   PetscErrorCode     ierr;
1757   PetscTruth         perm_identity;
1758   PetscReal          fill = info->fill;
1759   const PetscInt     *rip,*riip;
1760   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1761   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1762   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1763   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1764   PetscBT            lnkbt;
1765   IS                 iperm;
1766 
1767   PetscFunctionBegin;
1768   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1769   /* check whether perm is the identity mapping */
1770   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1771   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1772   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1773   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1774 
1775   /* initialization */
1776   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1777   ui[0] = 0;
1778 
1779   /* jl: linked list for storing indices of the pivot rows
1780      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1781   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1782   il     = jl + am;
1783   cols   = il + am;
1784   ui_ptr = (PetscInt**)(cols + am);
1785   for (i=0; i<am; i++){
1786     jl[i] = am; il[i] = 0;
1787   }
1788 
1789   /* create and initialize a linked list for storing column indices of the active row k */
1790   nlnk = am + 1;
1791   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1792 
1793   /* initial FreeSpace size is fill*(ai[am]+1) */
1794   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1795   current_space = free_space;
1796 
1797   for (k=0; k<am; k++){  /* for each active row k */
1798     /* initialize lnk by the column indices of row rip[k] of A */
1799     nzk   = 0;
1800     ncols = ai[rip[k]+1] - ai[rip[k]];
1801     if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1802     ncols_upper = 0;
1803     for (j=0; j<ncols; j++){
1804       i = riip[*(aj + ai[rip[k]] + j)];
1805       if (i >= k){ /* only take upper triangular entry */
1806         cols[ncols_upper] = i;
1807         ncols_upper++;
1808       }
1809     }
1810     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1811     nzk += nlnk;
1812 
1813     /* update lnk by computing fill-in for each pivot row to be merged in */
1814     prow = jl[k]; /* 1st pivot row */
1815 
1816     while (prow < k){
1817       nextprow = jl[prow];
1818       /* merge prow into k-th row */
1819       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1820       jmax = ui[prow+1];
1821       ncols = jmax-jmin;
1822       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1823       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1824       nzk += nlnk;
1825 
1826       /* update il and jl for prow */
1827       if (jmin < jmax){
1828         il[prow] = jmin;
1829         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1830       }
1831       prow = nextprow;
1832     }
1833 
1834     /* if free space is not available, make more free space */
1835     if (current_space->local_remaining<nzk) {
1836       i = am - k + 1; /* num of unfactored rows */
1837       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1838       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1839       reallocs++;
1840     }
1841 
1842     /* copy data into free space, then initialize lnk */
1843     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1844 
1845     /* add the k-th row into il and jl */
1846     if (nzk-1 > 0){
1847       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1848       jl[k] = jl[i]; jl[i] = k;
1849       il[k] = ui[k] + 1;
1850     }
1851     ui_ptr[k] = current_space->array;
1852     current_space->array           += nzk;
1853     current_space->local_used      += nzk;
1854     current_space->local_remaining -= nzk;
1855 
1856     ui[k+1] = ui[k] + nzk;
1857   }
1858 
1859 #if defined(PETSC_USE_INFO)
1860   if (ai[am] != 0) {
1861     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1862     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1863     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1864     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1865   } else {
1866      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1867   }
1868 #endif
1869 
1870   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1871   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1872   ierr = PetscFree(jl);CHKERRQ(ierr);
1873 
1874   /* destroy list of free space and other temporary array(s) */
1875   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1876   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1877   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1878 
1879   /* put together the new matrix in MATSEQSBAIJ format */
1880 
1881   b = (Mat_SeqSBAIJ*)(fact)->data;
1882   b->singlemalloc = PETSC_FALSE;
1883   b->free_a       = PETSC_TRUE;
1884   b->free_ij      = PETSC_TRUE;
1885   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1886   b->j    = uj;
1887   b->i    = ui;
1888   b->diag = 0;
1889   b->ilen = 0;
1890   b->imax = 0;
1891   b->row  = perm;
1892   b->col  = perm;
1893   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1894   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1895   b->icol = iperm;
1896   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1897   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1898   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1899   b->maxnz = b->nz = ui[am];
1900 
1901   (fact)->info.factor_mallocs    = reallocs;
1902   (fact)->info.fill_ratio_given  = fill;
1903   if (ai[am] != 0) {
1904     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1905   } else {
1906     (fact)->info.fill_ratio_needed = 0.0;
1907   }
1908   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1909   PetscFunctionReturn(0);
1910 }
1911