xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(1,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data,*b;
58   IS           iscolf,isicol,isirow;
59   PetscTruth   reorder;
60   PetscErrorCode ierr;
61   int          *c,*r,*ic,i,n = A->m,sierr;
62   int          *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   int          *ordcol,*iwk,*iperm,*jw;
64   int          jmax,lfill,job,*o_i,*o_j;
65   PetscScalar  *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal    permtol,af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (int)(info->dtcount/2.0);
75   jmax    = (int)(info->fill*a->nz);
76   permtol = info->dtcol;
77 
78 
79   /* ------------------------------------------------------------
80      If reorder=.TRUE., then the original matrix has to be
81      reordered to reflect the user selected ordering scheme, and
82      then de-reordered so it is in it's original format.
83      Because Saad's dperm() is NOT in place, we have to copy
84      the original matrix and allocate more storage. . .
85      ------------------------------------------------------------
86   */
87 
88   /* set reorder to true if either isrow or iscol is not identity */
89   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
90   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
91   reorder = PetscNot(reorder);
92 
93 
94   /* storage for ilu factor */
95   ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
98   ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr);
99 
100   /* ------------------------------------------------------------
101      Make sure that everything is Fortran formatted (1-Based)
102      ------------------------------------------------------------
103   */
104   for (i=old_i[0];i<old_i[n];i++) {
105     old_j[i]++;
106   }
107   for(i=0;i<n+1;i++) {
108     old_i[i]++;
109   };
110 
111 
112   if (reorder) {
113     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
114     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
115     for(i=0;i<n;i++) {
116       r[i]  = r[i]+1;
117       c[i]  = c[i]+1;
118     }
119     ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
122     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
123     for (i=0;i<n;i++) {
124       r[i]  = r[i]-1;
125       c[i]  = c[i]-1;
126     }
127     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
128     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
151       case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
152       case -5: SETERRQ(1,"ilutp(), zero row encountered");
153       case -1: SETERRQ(1,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax);
155       default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
222   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
223   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
224   (*fact)->factor    = FACTOR_LU;
225   (*fact)->assembled = PETSC_TRUE;
226 
227   b = (Mat_SeqAIJ*)(*fact)->data;
228   ierr = PetscFree(b->imax);CHKERRQ(ierr);
229   b->sorted        = PETSC_FALSE;
230   b->singlemalloc  = PETSC_FALSE;
231   /* the next line frees the default space generated by the MatCreate() */
232   ierr             = PetscFree(b->a);CHKERRQ(ierr);
233   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
234   b->a             = new_a;
235   b->j             = new_j;
236   b->i             = new_i;
237   b->ilen          = 0;
238   b->imax          = 0;
239   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240   b->row           = isirow;
241   b->col           = iscolf;
242   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
243   b->maxnz = b->nz = new_i[n];
244   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
245   (*fact)->info.factor_mallocs = 0;
246 
247   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
248 
249   /* check out for identical nodes. If found, use inode functions */
250   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
251 
252   af = ((double)b->nz)/((double)a->nz) + .001;
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
256   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
257 
258   PetscFunctionReturn(0);
259 #endif
260 }
261 
262 /*
263     Factorization code for AIJ format.
264 */
265 #undef __FUNCT__
266 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
267 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
268 {
269   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
270   IS         isicol;
271   PetscErrorCode ierr;
272   int        *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
273   int        *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
274   int        *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
275   PetscReal  f;
276 
277   PetscFunctionBegin;
278   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
280   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
281   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
282 
283   /* get new row pointers */
284   ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr);
285   ainew[0] = 0;
286   /* don't know how many column pointers are needed so estimate */
287   f = info->fill;
288   jmax  = (int)(f*ai[n]+1);
289   ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr);
290   /* fill is a linked list of nonzeros in active row */
291   ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr);
292   im   = fill + n + 1;
293   /* idnew is location of diagonal in factor */
294   ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr);
295   idnew[0] = 0;
296 
297   for (i=0; i<n; i++) {
298     /* first copy previous fill into linked list */
299     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
300     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
301     ajtmp   = aj + ai[r[i]];
302     fill[n] = n;
303     while (nz--) {
304       fm  = n;
305       idx = ic[*ajtmp++];
306       do {
307         m  = fm;
308         fm = fill[m];
309       } while (fm < idx);
310       fill[m]   = idx;
311       fill[idx] = fm;
312     }
313     row = fill[n];
314     while (row < i) {
315       ajtmp = ajnew + idnew[row] + 1;
316       nzbd  = 1 + idnew[row] - ainew[row];
317       nz    = im[row] - nzbd;
318       fm    = row;
319       while (nz-- > 0) {
320         idx = *ajtmp++ ;
321         nzbd++;
322         if (idx == i) im[row] = nzbd;
323         do {
324           m  = fm;
325           fm = fill[m];
326         } while (fm < idx);
327         if (fm != idx) {
328           fill[m]   = idx;
329           fill[idx] = fm;
330           fm        = idx;
331           nnz++;
332         }
333       }
334       row = fill[row];
335     }
336     /* copy new filled row into permanent storage */
337     ainew[i+1] = ainew[i] + nnz;
338     if (ainew[i+1] > jmax) {
339 
340       /* estimate how much additional space we will need */
341       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
342       /* just double the memory each time */
343       int maxadd = jmax;
344       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
345       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
346       jmax += maxadd;
347 
348       /* allocate a longer ajnew */
349       ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr);
350       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(int));CHKERRQ(ierr);
351       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
352       ajnew = ajtmp;
353       realloc++; /* count how many times we realloc */
354     }
355     ajtmp = ajnew + ainew[i];
356     fm    = fill[n];
357     nzi   = 0;
358     im[i] = nnz;
359     while (nnz--) {
360       if (fm < i) nzi++;
361       *ajtmp++ = fm ;
362       fm       = fill[fm];
363     }
364     idnew[i] = ainew[i] + nzi;
365   }
366   if (ai[n] != 0) {
367     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
370     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
371     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
372   } else {
373     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
374   }
375 
376   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
377   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
378 
379   ierr = PetscFree(fill);CHKERRQ(ierr);
380 
381   /* put together the new matrix */
382   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
383   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
384   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
385   PetscLogObjectParent(*B,isicol);
386   b = (Mat_SeqAIJ*)(*B)->data;
387   ierr = PetscFree(b->imax);CHKERRQ(ierr);
388   b->singlemalloc = PETSC_FALSE;
389   /* the next line frees the default space generated by the Create() */
390   ierr = PetscFree(b->a);CHKERRQ(ierr);
391   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
392   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
393   b->j          = ajnew;
394   b->i          = ainew;
395   b->diag       = idnew;
396   b->ilen       = 0;
397   b->imax       = 0;
398   b->row        = isrow;
399   b->col        = iscol;
400   b->lu_damping        = info->damping;
401   b->lu_zeropivot      = info->zeropivot;
402   b->lu_shift          = info->shift;
403   b->lu_shift_fraction = info->shift_fraction;
404   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
405   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
406   b->icol       = isicol;
407   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
408   /* In b structure:  Free imax, ilen, old a, old j.
409      Allocate idnew, solve_work, new a, new j */
410   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(PetscScalar)));
411   b->maxnz = b->nz = ainew[n] ;
412 
413   (*B)->factor                 =  FACTOR_LU;
414   (*B)->info.factor_mallocs    = realloc;
415   (*B)->info.fill_ratio_given  = f;
416   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
417   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
418 
419   if (ai[n] != 0) {
420     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
421   } else {
422     (*B)->info.fill_ratio_needed = 0.0;
423   }
424   PetscFunctionReturn(0);
425 }
426 /* ----------------------------------------------------------- */
427 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
431 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
432 {
433   Mat          C = *B;
434   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
435   IS           isrow = b->row,isicol = b->icol;
436   PetscErrorCode ierr;
437   int          *r,*ic,i,j,n = A->m,*ai = b->i,*aj = b->j;
438   int          *ajtmpold,*ajtmp,nz,row,*ics;
439   int          *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0;
440   PetscScalar  *rtmp,*v,*pc,multiplier,*pv,*rtmps;
441   PetscReal    damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d;
442   PetscReal    row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.;
443   PetscTruth   damp,lushift;
444 
445   PetscFunctionBegin;
446   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
447   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
448   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
449   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
450   rtmps = rtmp; ics = ic;
451 
452   if (!a->diag) {
453     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
454   }
455 
456   if (b->lu_shift) { /* set max shift */
457     int *aai = a->i,*ddiag = a->diag;
458     shift_top = 0;
459     for (i=0; i<n; i++) {
460       d = PetscAbsScalar((a->a)[ddiag[i]]);
461       /* calculate amt of shift needed for this row */
462       if (d<=0) {
463         row_shift = 0;
464       } else {
465         row_shift = -2*d;
466       }
467       v = a->a+aai[i];
468       for (j=0; j<aai[i+1]-aai[i]; j++)
469 	row_shift += PetscAbsScalar(v[j]);
470       if (row_shift>shift_top) shift_top = row_shift;
471     }
472   }
473 
474   shift_fraction = 0; shift_amount = 0;
475   do {
476     damp    = PETSC_FALSE;
477     lushift = PETSC_FALSE;
478     for (i=0; i<n; i++) {
479       nz    = ai[i+1] - ai[i];
480       ajtmp = aj + ai[i];
481       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
482 
483       /* load in initial (unfactored row) */
484       nz       = a->i[r[i]+1] - a->i[r[i]];
485       ajtmpold = a->j + a->i[r[i]];
486       v        = a->a + a->i[r[i]];
487       for (j=0; j<nz; j++) {
488         rtmp[ics[ajtmpold[j]]] = v[j];
489       }
490       rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */
491 
492       row = *ajtmp++ ;
493       while  (row < i) {
494         pc = rtmp + row;
495         if (*pc != 0.0) {
496           pv         = b->a + diag_offset[row];
497           pj         = b->j + diag_offset[row] + 1;
498           multiplier = *pc / *pv++;
499           *pc        = multiplier;
500           nz         = ai[row+1] - diag_offset[row] - 1;
501           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
502           PetscLogFlops(2*nz);
503         }
504         row = *ajtmp++;
505       }
506       /* finished row so stick it into b->a */
507       pv   = b->a + ai[i] ;
508       pj   = b->j + ai[i] ;
509       nz   = ai[i+1] - ai[i];
510       diag = diag_offset[i] - ai[i];
511       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
512       rs   = 0.0;
513       for (j=0; j<nz; j++) {
514         pv[j] = rtmps[pj[j]];
515         if (j != diag) rs += PetscAbsScalar(pv[j]);
516       }
517 #define MAX_NSHIFT 5
518       if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) {
519 	if (nshift>MAX_NSHIFT) {
520 	  SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner");
521 	} else if (nshift==MAX_NSHIFT) {
522 	  shift_fraction = shift_hi;
523 	  lushift      = PETSC_FALSE;
524 	} else {
525 	  shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
526 	  lushift      = PETSC_TRUE;
527 	}
528 	shift_amount = shift_fraction * shift_top;
529         nshift++;
530         break;
531       }
532       if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) {
533         if (damping) {
534           if (ndamp) damping *= 2.0;
535           damp = PETSC_TRUE;
536           ndamp++;
537           break;
538         } else {
539           SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
540         }
541       }
542     }
543     if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) {
544       /*
545        * if no shift in this attempt & shifting & started shifting & can refine,
546        * then try lower shift
547        */
548       shift_hi       = shift_fraction;
549       shift_fraction = (shift_hi+shift_lo)/2.;
550       shift_amount   = shift_fraction * shift_top;
551       lushift        = PETSC_TRUE;
552       nshift++;
553     }
554   } while (damp || lushift);
555 
556   /* invert diagonal entries for simplier triangular solves */
557   for (i=0; i<n; i++) {
558     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
559   }
560 
561   ierr = PetscFree(rtmp);CHKERRQ(ierr);
562   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
563   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
564   C->factor = FACTOR_LU;
565   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
566   C->assembled = PETSC_TRUE;
567   PetscLogFlops(C->n);
568   if (ndamp) {
569     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping);
570   }
571   if (nshift) {
572     b->lu_shift_fraction = shift_fraction;
573     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %d\n",shift_fraction,shift_top,nshift);
574   }
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
580 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
581 {
582   PetscFunctionBegin;
583   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
584   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
585   PetscFunctionReturn(0);
586 }
587 
588 
589 /* ----------------------------------------------------------- */
590 #undef __FUNCT__
591 #define __FUNCT__ "MatLUFactor_SeqAIJ"
592 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
593 {
594   PetscErrorCode ierr;
595   Mat C;
596 
597   PetscFunctionBegin;
598   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
599   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
600   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
601   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
602   PetscFunctionReturn(0);
603 }
604 /* ----------------------------------------------------------- */
605 #undef __FUNCT__
606 #define __FUNCT__ "MatSolve_SeqAIJ"
607 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
608 {
609   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
610   IS           iscol = a->col,isrow = a->row;
611   PetscErrorCode ierr;
612   int          *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
613   int          nz,*rout,*cout;
614   PetscScalar  *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
615 
616   PetscFunctionBegin;
617   if (!n) PetscFunctionReturn(0);
618 
619   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
620   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
621   tmp  = a->solve_work;
622 
623   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
624   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
625 
626   /* forward solve the lower triangular */
627   tmp[0] = b[*r++];
628   tmps   = tmp;
629   for (i=1; i<n; i++) {
630     v   = aa + ai[i] ;
631     vi  = aj + ai[i] ;
632     nz  = a->diag[i] - ai[i];
633     sum = b[*r++];
634     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
635     tmp[i] = sum;
636   }
637 
638   /* backward solve the upper triangular */
639   for (i=n-1; i>=0; i--){
640     v   = aa + a->diag[i] + 1;
641     vi  = aj + a->diag[i] + 1;
642     nz  = ai[i+1] - a->diag[i] - 1;
643     sum = tmp[i];
644     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
645     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
646   }
647 
648   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
649   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
650   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
651   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
652   PetscLogFlops(2*a->nz - A->n);
653   PetscFunctionReturn(0);
654 }
655 
656 /* ----------------------------------------------------------- */
657 #undef __FUNCT__
658 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
659 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
660 {
661   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
662   PetscErrorCode ierr;
663   int          n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
664   PetscScalar  *x,*b,*aa = a->a;
665 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
666   int          adiag_i,i,*vi,nz,ai_i;
667   PetscScalar  *v,sum;
668 #endif
669 
670   PetscFunctionBegin;
671   if (!n) PetscFunctionReturn(0);
672 
673   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
674   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
675 
676 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
677   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
678 #else
679   /* forward solve the lower triangular */
680   x[0] = b[0];
681   for (i=1; i<n; i++) {
682     ai_i = ai[i];
683     v    = aa + ai_i;
684     vi   = aj + ai_i;
685     nz   = adiag[i] - ai_i;
686     sum  = b[i];
687     while (nz--) sum -= *v++ * x[*vi++];
688     x[i] = sum;
689   }
690 
691   /* backward solve the upper triangular */
692   for (i=n-1; i>=0; i--){
693     adiag_i = adiag[i];
694     v       = aa + adiag_i + 1;
695     vi      = aj + adiag_i + 1;
696     nz      = ai[i+1] - adiag_i - 1;
697     sum     = x[i];
698     while (nz--) sum -= *v++ * x[*vi++];
699     x[i]    = sum*aa[adiag_i];
700   }
701 #endif
702   PetscLogFlops(2*a->nz - A->n);
703   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
704   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
710 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
711 {
712   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
713   IS           iscol = a->col,isrow = a->row;
714   PetscErrorCode ierr;
715   int          *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
716   int          nz,*rout,*cout;
717   PetscScalar  *x,*b,*tmp,*aa = a->a,sum,*v;
718 
719   PetscFunctionBegin;
720   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
721 
722   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
723   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
724   tmp  = a->solve_work;
725 
726   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
727   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
728 
729   /* forward solve the lower triangular */
730   tmp[0] = b[*r++];
731   for (i=1; i<n; i++) {
732     v   = aa + ai[i] ;
733     vi  = aj + ai[i] ;
734     nz  = a->diag[i] - ai[i];
735     sum = b[*r++];
736     while (nz--) sum -= *v++ * tmp[*vi++ ];
737     tmp[i] = sum;
738   }
739 
740   /* backward solve the upper triangular */
741   for (i=n-1; i>=0; i--){
742     v   = aa + a->diag[i] + 1;
743     vi  = aj + a->diag[i] + 1;
744     nz  = ai[i+1] - a->diag[i] - 1;
745     sum = tmp[i];
746     while (nz--) sum -= *v++ * tmp[*vi++ ];
747     tmp[i] = sum*aa[a->diag[i]];
748     x[*c--] += tmp[i];
749   }
750 
751   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
752   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
753   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
754   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
755   PetscLogFlops(2*a->nz);
756 
757   PetscFunctionReturn(0);
758 }
759 /* -------------------------------------------------------------------*/
760 #undef __FUNCT__
761 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
762 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
763 {
764   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
765   IS           iscol = a->col,isrow = a->row;
766   PetscErrorCode ierr;
767   int          *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
768   int          nz,*rout,*cout,*diag = a->diag;
769   PetscScalar  *x,*b,*tmp,*aa = a->a,*v,s1;
770 
771   PetscFunctionBegin;
772   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
773   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
774   tmp  = a->solve_work;
775 
776   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
777   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
778 
779   /* copy the b into temp work space according to permutation */
780   for (i=0; i<n; i++) tmp[i] = b[c[i]];
781 
782   /* forward solve the U^T */
783   for (i=0; i<n; i++) {
784     v   = aa + diag[i] ;
785     vi  = aj + diag[i] + 1;
786     nz  = ai[i+1] - diag[i] - 1;
787     s1  = tmp[i];
788     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
789     while (nz--) {
790       tmp[*vi++ ] -= (*v++)*s1;
791     }
792     tmp[i] = s1;
793   }
794 
795   /* backward solve the L^T */
796   for (i=n-1; i>=0; i--){
797     v   = aa + diag[i] - 1 ;
798     vi  = aj + diag[i] - 1 ;
799     nz  = diag[i] - ai[i];
800     s1  = tmp[i];
801     while (nz--) {
802       tmp[*vi-- ] -= (*v--)*s1;
803     }
804   }
805 
806   /* copy tmp into x according to permutation */
807   for (i=0; i<n; i++) x[r[i]] = tmp[i];
808 
809   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
810   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
811   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
812   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
813 
814   PetscLogFlops(2*a->nz-A->n);
815   PetscFunctionReturn(0);
816 }
817 
818 #undef __FUNCT__
819 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
820 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
821 {
822   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
823   IS           iscol = a->col,isrow = a->row;
824   PetscErrorCode ierr;
825   int          *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
826   int          nz,*rout,*cout,*diag = a->diag;
827   PetscScalar  *x,*b,*tmp,*aa = a->a,*v;
828 
829   PetscFunctionBegin;
830   if (zz != xx) VecCopy(zz,xx);
831 
832   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   tmp = a->solve_work;
835 
836   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
837   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
838 
839   /* copy the b into temp work space according to permutation */
840   for (i=0; i<n; i++) tmp[i] = b[c[i]];
841 
842   /* forward solve the U^T */
843   for (i=0; i<n; i++) {
844     v   = aa + diag[i] ;
845     vi  = aj + diag[i] + 1;
846     nz  = ai[i+1] - diag[i] - 1;
847     tmp[i] *= *v++;
848     while (nz--) {
849       tmp[*vi++ ] -= (*v++)*tmp[i];
850     }
851   }
852 
853   /* backward solve the L^T */
854   for (i=n-1; i>=0; i--){
855     v   = aa + diag[i] - 1 ;
856     vi  = aj + diag[i] - 1 ;
857     nz  = diag[i] - ai[i];
858     while (nz--) {
859       tmp[*vi-- ] -= (*v--)*tmp[i];
860     }
861   }
862 
863   /* copy tmp into x according to permutation */
864   for (i=0; i<n; i++) x[r[i]] += tmp[i];
865 
866   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
867   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
868   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
869   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
870 
871   PetscLogFlops(2*a->nz);
872   PetscFunctionReturn(0);
873 }
874 /* ----------------------------------------------------------------*/
875 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
876 
877 #undef __FUNCT__
878 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
879 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
880 {
881   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
882   IS         isicol;
883   PetscErrorCode ierr;
884   int        *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
885   int        *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
886   int        *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
887   int        incrlev,nnz,i,levels,diagonal_fill;
888   PetscTruth col_identity,row_identity;
889   PetscReal  f;
890 
891   PetscFunctionBegin;
892   f             = info->fill;
893   levels        = (int)info->levels;
894   diagonal_fill = (int)info->diagonal_fill;
895   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
896 
897   /* special case that simply copies fill pattern */
898   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
899   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
900   if (!levels && row_identity && col_identity) {
901     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
902     (*fact)->factor = FACTOR_LU;
903     b               = (Mat_SeqAIJ*)(*fact)->data;
904     if (!b->diag) {
905       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
906     }
907     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
908     b->row              = isrow;
909     b->col              = iscol;
910     b->icol             = isicol;
911     b->lu_damping       = info->damping;
912     b->lu_zeropivot     = info->zeropivot;
913     b->lu_shift         = info->shift;
914     b->lu_shift_fraction= info->shift_fraction;
915     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
916     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
917     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
918     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
919     PetscFunctionReturn(0);
920   }
921 
922   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
923   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
924 
925   /* get new row pointers */
926   ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr);
927   ainew[0] = 0;
928   /* don't know how many column pointers are needed so estimate */
929   jmax = (int)(f*(ai[n]+1));
930   ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr);
931   /* ajfill is level of fill for each fill entry */
932   ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr);
933   /* fill is a linked list of nonzeros in active row */
934   ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr);
935   /* im is level for each filled value */
936   ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr);
937   /* dloc is location of diagonal in factor */
938   ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr);
939   dloc[0]  = 0;
940   for (prow=0; prow<n; prow++) {
941 
942     /* copy current row into linked list */
943     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
944     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
945     xi      = aj + ai[r[prow]] ;
946     fill[n]    = n;
947     fill[prow] = -1; /* marker to indicate if diagonal exists */
948     while (nz--) {
949       fm  = n;
950       idx = ic[*xi++ ];
951       do {
952         m  = fm;
953         fm = fill[m];
954       } while (fm < idx);
955       fill[m]   = idx;
956       fill[idx] = fm;
957       im[idx]   = 0;
958     }
959 
960     /* make sure diagonal entry is included */
961     if (diagonal_fill && fill[prow] == -1) {
962       fm = n;
963       while (fill[fm] < prow) fm = fill[fm];
964       fill[prow] = fill[fm]; /* insert diagonal into linked list */
965       fill[fm]   = prow;
966       im[prow]   = 0;
967       nzf++;
968       dcount++;
969     }
970 
971     nzi = 0;
972     row = fill[n];
973     while (row < prow) {
974       incrlev = im[row] + 1;
975       nz      = dloc[row];
976       xi      = ajnew  + ainew[row]  + nz + 1;
977       flev    = ajfill + ainew[row]  + nz + 1;
978       nnz     = ainew[row+1] - ainew[row] - nz - 1;
979       fm      = row;
980       while (nnz-- > 0) {
981         idx = *xi++ ;
982         if (*flev + incrlev > levels) {
983           flev++;
984           continue;
985         }
986         do {
987           m  = fm;
988           fm = fill[m];
989         } while (fm < idx);
990         if (fm != idx) {
991           im[idx]   = *flev + incrlev;
992           fill[m]   = idx;
993           fill[idx] = fm;
994           fm        = idx;
995           nzf++;
996         } else {
997           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
998         }
999         flev++;
1000       }
1001       row = fill[row];
1002       nzi++;
1003     }
1004     /* copy new filled row into permanent storage */
1005     ainew[prow+1] = ainew[prow] + nzf;
1006     if (ainew[prow+1] > jmax) {
1007 
1008       /* estimate how much additional space we will need */
1009       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1010       /* just double the memory each time */
1011       /*  maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1012       int maxadd = jmax;
1013       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1014       jmax += maxadd;
1015 
1016       /* allocate a longer ajnew and ajfill */
1017       ierr   = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr);
1018       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(int));CHKERRQ(ierr);
1019       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1020       ajnew  = xi;
1021       ierr   = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr);
1022       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(int));CHKERRQ(ierr);
1023       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1024       ajfill = xi;
1025       realloc++; /* count how many times we realloc */
1026     }
1027     xi          = ajnew + ainew[prow] ;
1028     flev        = ajfill + ainew[prow] ;
1029     dloc[prow]  = nzi;
1030     fm          = fill[n];
1031     while (nzf--) {
1032       *xi++   = fm ;
1033       *flev++ = im[fm];
1034       fm      = fill[fm];
1035     }
1036     /* make sure row has diagonal entry */
1037     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1038       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\
1039     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1040     }
1041   }
1042   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1043   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1044   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1045   ierr = PetscFree(fill);CHKERRQ(ierr);
1046   ierr = PetscFree(im);CHKERRQ(ierr);
1047 
1048   {
1049     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1050     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1051     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1052     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1053     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1054     if (diagonal_fill) {
1055       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount);
1056     }
1057   }
1058 
1059   /* put together the new matrix */
1060   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1061   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1062   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1063   PetscLogObjectParent(*fact,isicol);
1064   b = (Mat_SeqAIJ*)(*fact)->data;
1065   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1066   b->singlemalloc = PETSC_FALSE;
1067   len = (ainew[n] )*sizeof(PetscScalar);
1068   /* the next line frees the default space generated by the Create() */
1069   ierr = PetscFree(b->a);CHKERRQ(ierr);
1070   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1071   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1072   b->j          = ajnew;
1073   b->i          = ainew;
1074   for (i=0; i<n; i++) dloc[i] += ainew[i];
1075   b->diag       = dloc;
1076   b->ilen       = 0;
1077   b->imax       = 0;
1078   b->row        = isrow;
1079   b->col        = iscol;
1080   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1081   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1082   b->icol       = isicol;
1083   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1084   /* In b structure:  Free imax, ilen, old a, old j.
1085      Allocate dloc, solve_work, new a, new j */
1086   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(int)+sizeof(PetscScalar)));
1087   b->maxnz             = b->nz = ainew[n] ;
1088   b->lu_damping        = info->damping;
1089   b->lu_shift          = info->shift;
1090   b->lu_shift_fraction = info->shift_fraction;
1091   b->lu_zeropivot = info->zeropivot;
1092   (*fact)->factor = FACTOR_LU;
1093   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1094   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1095 
1096   (*fact)->info.factor_mallocs    = realloc;
1097   (*fact)->info.fill_ratio_given  = f;
1098   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #include "src/mat/impls/sbaij/seq/sbaij.h"
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1105 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact)
1106 {
1107   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   if (!a->sbaijMat){
1112     ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
1113   }
1114 
1115   ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);CHKERRQ(ierr);
1116   ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);
1117   a->sbaijMat = PETSC_NULL;
1118 
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1124 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1125 {
1126   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
1127   PetscErrorCode ierr;
1128   PetscTruth          perm_identity;
1129 
1130   PetscFunctionBegin;
1131   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1132   if (!perm_identity){
1133     SETERRQ(1,"Non-identity permutation is not supported yet");
1134   }
1135   if (!a->sbaijMat){
1136     ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
1137   }
1138 
1139   ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
1140   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1141 
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1147 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1148 {
1149   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscTruth          perm_identity;
1152 
1153   PetscFunctionBegin;
1154   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1155   if (!perm_identity){
1156     SETERRQ(1,"Non-identity permutation is not supported yet");
1157   }
1158   if (!a->sbaijMat){
1159     ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
1160   }
1161 
1162   ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
1163   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1164 
1165   PetscFunctionReturn(0);
1166 }
1167