xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 09f3b4e5628a00a1eaf17d80982cfbcc515cc9c1)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
23 
24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
28 #endif
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
32   /* ------------------------------------------------------------
33 
34           This interface was contribed by Tony Caola
35 
36      This routine is an interface to the pivoting drop-tolerance
37      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
38      SPARSEKIT2.
39 
40      The SPARSEKIT2 routines used here are covered by the GNU
41      copyright; see the file gnu in this directory.
42 
43      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
44      help in getting this routine ironed out.
45 
46      The major drawback to this routine is that if info->fill is
47      not large enough it fails rather than allocating more space;
48      this can be fixed by hacking/improving the f2c version of
49      Yousef Saad's code.
50 
51      ------------------------------------------------------------
52 */
53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
54 {
55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
56   PetscFunctionBegin;
57   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
58   You can obtain the drop tolerance routines by installing PETSc from\n\
59   www.mcs.anl.gov/petsc\n");
60 #else
61   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
62   IS             iscolf,isicol,isirow;
63   PetscTruth     reorder;
64   PetscErrorCode ierr,sierr;
65   PetscInt       *c,*r,*ic,i,n = A->m;
66   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
67   PetscInt       *ordcol,*iwk,*iperm,*jw;
68   PetscInt       jmax,lfill,job,*o_i,*o_j;
69   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
70   PetscReal      af;
71 
72   PetscFunctionBegin;
73 
74   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
75   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
76   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
77   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
78   lfill   = (PetscInt)(info->dtcount/2.0);
79   jmax    = (PetscInt)(info->fill*a->nz);
80 
81 
82   /* ------------------------------------------------------------
83      If reorder=.TRUE., then the original matrix has to be
84      reordered to reflect the user selected ordering scheme, and
85      then de-reordered so it is in it's original format.
86      Because Saad's dperm() is NOT in place, we have to copy
87      the original matrix and allocate more storage. . .
88      ------------------------------------------------------------
89   */
90 
91   /* set reorder to true if either isrow or iscol is not identity */
92   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
93   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
94   reorder = PetscNot(reorder);
95 
96 
97   /* storage for ilu factor */
98   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
100   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
101   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
102 
103   /* ------------------------------------------------------------
104      Make sure that everything is Fortran formatted (1-Based)
105      ------------------------------------------------------------
106   */
107   for (i=old_i[0];i<old_i[n];i++) {
108     old_j[i]++;
109   }
110   for(i=0;i<n+1;i++) {
111     old_i[i]++;
112   };
113 
114 
115   if (reorder) {
116     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
117     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       r[i]  = r[i]+1;
120       c[i]  = c[i]+1;
121     }
122     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
123     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
124     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
125     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126     for (i=0;i<n;i++) {
127       r[i]  = r[i]-1;
128       c[i]  = c[i]-1;
129     }
130     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
131     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
132     o_a = old_a2;
133     o_j = old_j2;
134     o_i = old_i2;
135   } else {
136     o_a = old_a;
137     o_j = old_j;
138     o_i = old_i;
139   }
140 
141   /* ------------------------------------------------------------
142      Call Saad's ilutp() routine to generate the factorization
143      ------------------------------------------------------------
144   */
145 
146   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
147   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
148   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
149 
150   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151   if (sierr) {
152     switch (sierr) {
153       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
154       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
155       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159     }
160   }
161 
162   ierr = PetscFree(w);CHKERRQ(ierr);
163   ierr = PetscFree(jw);CHKERRQ(ierr);
164 
165   /* ------------------------------------------------------------
166      Saad's routine gives the result in Modified Sparse Row (msr)
167      Convert to Compressed Sparse Row format (csr)
168      ------------------------------------------------------------
169   */
170 
171   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
172   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
173 
174   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
175 
176   ierr = PetscFree(iwk);CHKERRQ(ierr);
177   ierr = PetscFree(wk);CHKERRQ(ierr);
178 
179   if (reorder) {
180     ierr = PetscFree(old_a2);CHKERRQ(ierr);
181     ierr = PetscFree(old_j2);CHKERRQ(ierr);
182     ierr = PetscFree(old_i2);CHKERRQ(ierr);
183   } else {
184     /* fix permutation of old_j that the factorization introduced */
185     for (i=old_i[0]; i<old_i[n]; i++) {
186       old_j[i-1] = iperm[old_j[i-1]-1];
187     }
188   }
189 
190   /* get rid of the shift to indices starting at 1 */
191   for (i=0; i<n+1; i++) {
192     old_i[i]--;
193   }
194   for (i=old_i[0];i<old_i[n];i++) {
195     old_j[i]--;
196   }
197 
198   /* Make the factored matrix 0-based */
199   for (i=0; i<n+1; i++) {
200     new_i[i]--;
201   }
202   for (i=new_i[0];i<new_i[n];i++) {
203     new_j[i]--;
204   }
205 
206   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207   /*-- permute the right-hand-side and solution vectors           --*/
208   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
209   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
210   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
211   for(i=0; i<n; i++) {
212     ordcol[i] = ic[iperm[i]-1];
213   };
214   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
215   ierr = ISDestroy(isicol);CHKERRQ(ierr);
216 
217   ierr = PetscFree(iperm);CHKERRQ(ierr);
218 
219   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
220   ierr = PetscFree(ordcol);CHKERRQ(ierr);
221 
222   /*----- put together the new matrix -----*/
223 
224   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
225   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
226   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
227   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
228   (*fact)->factor    = FACTOR_LU;
229   (*fact)->assembled = PETSC_TRUE;
230 
231   b = (Mat_SeqAIJ*)(*fact)->data;
232   b->freedata      = PETSC_TRUE;
233   b->sorted        = PETSC_FALSE;
234   b->singlemalloc  = PETSC_FALSE;
235   b->a             = new_a;
236   b->j             = new_j;
237   b->i             = new_i;
238   b->ilen          = 0;
239   b->imax          = 0;
240   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241   b->row           = isirow;
242   b->col           = iscolf;
243   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
244   b->maxnz = b->nz = new_i[n];
245   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
246   (*fact)->info.factor_mallocs = 0;
247 
248   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
249 
250   af = ((double)b->nz)/((double)a->nz) + .001;
251   ierr = PetscVerboseInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr);
252   ierr = PetscVerboseInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr);
253   ierr = PetscVerboseInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
254   ierr = PetscVerboseInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
255 
256   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
257 
258   PetscFunctionReturn(0);
259 #endif
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265 {
266   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
267   IS                 isicol;
268   PetscErrorCode     ierr;
269   PetscInt           *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
270   PetscInt           *bi,*bj,*ajtmp;
271   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272   PetscReal          f;
273   PetscInt           nlnk,*lnk,k,**bi_ptr;
274   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
275   PetscBT            lnkbt;
276 
277   PetscFunctionBegin;
278   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
280   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
281   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
282 
283   /* get new row pointers */
284   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
285   bi[0] = 0;
286 
287   /* bdiag is location of diagonal in factor */
288   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
289   bdiag[0] = 0;
290 
291   /* linked list for storing column indices of the active row */
292   nlnk = n + 1;
293   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
294 
295   ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr);
296   bi_ptr = (PetscInt**)(im + n);
297 
298   /* initial FreeSpace size is f*(ai[n]+1) */
299   f = info->fill;
300   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
301   current_space = free_space;
302 
303   for (i=0; i<n; i++) {
304     /* copy previous fill into linked list */
305     nzi = 0;
306     nnz = ai[r[i]+1] - ai[r[i]];
307     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
308     ajtmp = aj + ai[r[i]];
309     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
310     nzi += nlnk;
311 
312     /* add pivot rows into linked list */
313     row = lnk[n];
314     while (row < i) {
315       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
316       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
317       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
318       nzi += nlnk;
319       row  = lnk[row];
320     }
321     bi[i+1] = bi[i] + nzi;
322     im[i]   = nzi;
323 
324     /* mark bdiag */
325     nzbd = 0;
326     nnz  = nzi;
327     k    = lnk[n];
328     while (nnz-- && k < i){
329       nzbd++;
330       k = lnk[k];
331     }
332     bdiag[i] = bi[i] + nzbd;
333 
334     /* if free space is not available, make more free space */
335     if (current_space->local_remaining<nzi) {
336       nnz = (n - i)*nzi; /* estimated and max additional space needed */
337       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
338       reallocs++;
339     }
340 
341     /* copy data into free space, then initialize lnk */
342     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
343     bi_ptr[i] = current_space->array;
344     current_space->array           += nzi;
345     current_space->local_used      += nzi;
346     current_space->local_remaining -= nzi;
347   }
348 #if defined(PETSC_USE_VERBOSE)
349   if (ai[n] != 0) {
350     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
351     ierr = PetscVerboseInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
352     ierr = PetscVerboseInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr);
353     ierr = PetscVerboseInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
354     ierr = PetscVerboseInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
355   } else {
356     ierr = PetscVerboseInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr);
357   }
358 #endif
359 
360   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
361   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
362 
363   /* destroy list of free space and other temporary array(s) */
364   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
365   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
366   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
367   ierr = PetscFree(im);CHKERRQ(ierr);
368 
369   /* put together the new matrix */
370   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
371   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
372   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
373   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
374   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
375   b    = (Mat_SeqAIJ*)(*B)->data;
376   b->freedata     = PETSC_TRUE;
377   b->singlemalloc = PETSC_FALSE;
378   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
379   b->j          = bj;
380   b->i          = bi;
381   b->diag       = bdiag;
382   b->ilen       = 0;
383   b->imax       = 0;
384   b->row        = isrow;
385   b->col        = iscol;
386   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
387   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
388   b->icol       = isicol;
389   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
390 
391   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
392   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
393   b->maxnz = b->nz = bi[n] ;
394 
395   (*B)->factor                 =  FACTOR_LU;
396   (*B)->info.factor_mallocs    = reallocs;
397   (*B)->info.fill_ratio_given  = f;
398 
399   if (ai[n] != 0) {
400     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
401   } else {
402     (*B)->info.fill_ratio_needed = 0.0;
403   }
404   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
405   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
406   PetscFunctionReturn(0);
407 }
408 
409 /* ----------------------------------------------------------- */
410 #undef __FUNCT__
411 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
412 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
413 {
414   Mat            C=*B;
415   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
416   IS             isrow = b->row,isicol = b->icol;
417   PetscErrorCode ierr;
418   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
419   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
420   PetscInt       *diag_offset = b->diag,diag,*pj;
421   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
422   PetscScalar    d;
423   PetscReal      rs;
424   LUShift_Ctx    sctx;
425   PetscInt       newshift;
426 
427   PetscFunctionBegin;
428   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
429   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
430   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
431   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
432   rtmps = rtmp; ics = ic;
433 
434   if (!a->diag) {
435     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
436   }
437   /* if both shift schemes are chosen by user, only use info->shiftpd */
438   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
439   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
440     PetscInt *aai = a->i,*ddiag = a->diag;
441     sctx.shift_top = 0;
442     for (i=0; i<n; i++) {
443       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
444       d  = (a->a)[ddiag[i]];
445       rs = -PetscAbsScalar(d) - PetscRealPart(d);
446       v  = a->a+aai[i];
447       nz = aai[i+1] - aai[i];
448       for (j=0; j<nz; j++)
449 	rs += PetscAbsScalar(v[j]);
450       if (rs>sctx.shift_top) sctx.shift_top = rs;
451     }
452     if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12;
453     sctx.shift_top    *= 1.1;
454     sctx.nshift_max   = 5;
455     sctx.shift_lo     = 0.;
456     sctx.shift_hi     = 1.;
457   }
458 
459   sctx.shift_amount = 0;
460   sctx.nshift       = 0;
461   do {
462     sctx.lushift = PETSC_FALSE;
463     for (i=0; i<n; i++){
464       nz    = bi[i+1] - bi[i];
465       bjtmp = bj + bi[i];
466       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
467 
468       /* load in initial (unfactored row) */
469       nz    = a->i[r[i]+1] - a->i[r[i]];
470       ajtmp = a->j + a->i[r[i]];
471       v     = a->a + a->i[r[i]];
472       for (j=0; j<nz; j++) {
473         rtmp[ics[ajtmp[j]]] = v[j];
474       }
475       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
476 
477       row = *bjtmp++;
478       while  (row < i) {
479         pc = rtmp + row;
480         if (*pc != 0.0) {
481           pv         = b->a + diag_offset[row];
482           pj         = b->j + diag_offset[row] + 1;
483           multiplier = *pc / *pv++;
484           *pc        = multiplier;
485           nz         = bi[row+1] - diag_offset[row] - 1;
486           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
487           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
488         }
489         row = *bjtmp++;
490       }
491       /* finished row so stick it into b->a */
492       pv   = b->a + bi[i] ;
493       pj   = b->j + bi[i] ;
494       nz   = bi[i+1] - bi[i];
495       diag = diag_offset[i] - bi[i];
496       rs   = 0.0;
497       for (j=0; j<nz; j++) {
498         pv[j] = rtmps[pj[j]];
499         if (j != diag) rs += PetscAbsScalar(pv[j]);
500       }
501 
502       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
503       sctx.rs  = rs;
504       sctx.pv  = pv[diag];
505       ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
506       if (newshift == 1){
507         break;    /* sctx.shift_amount is updated */
508       } else if (newshift == -1){
509         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
510       }
511     }
512 
513     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
514       /*
515        * if no shift in this attempt & shifting & started shifting & can refine,
516        * then try lower shift
517        */
518       sctx.shift_hi        = info->shift_fraction;
519       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
520       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
521       sctx.lushift         = PETSC_TRUE;
522       sctx.nshift++;
523     }
524   } while (sctx.lushift);
525 
526   /* invert diagonal entries for simplier triangular solves */
527   for (i=0; i<n; i++) {
528     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
529   }
530 
531   ierr = PetscFree(rtmp);CHKERRQ(ierr);
532   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
533   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
534   C->factor = FACTOR_LU;
535   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
536   C->assembled = PETSC_TRUE;
537   ierr = PetscLogFlops(C->n);CHKERRQ(ierr);
538   if (sctx.nshift){
539     if (info->shiftnz) {
540       ierr = PetscVerboseInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
541     } else if (info->shiftpd) {
542       ierr = PetscVerboseInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr);
543     }
544   }
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
550 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
551 {
552   PetscFunctionBegin;
553   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
554   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
555   PetscFunctionReturn(0);
556 }
557 
558 
559 /* ----------------------------------------------------------- */
560 #undef __FUNCT__
561 #define __FUNCT__ "MatLUFactor_SeqAIJ"
562 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
563 {
564   PetscErrorCode ierr;
565   Mat            C;
566 
567   PetscFunctionBegin;
568   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
569   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
570   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
571   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 /* ----------------------------------------------------------- */
575 #undef __FUNCT__
576 #define __FUNCT__ "MatSolve_SeqAIJ"
577 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
578 {
579   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
580   IS             iscol = a->col,isrow = a->row;
581   PetscErrorCode ierr;
582   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
583   PetscInt       nz,*rout,*cout;
584   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
585 
586   PetscFunctionBegin;
587   if (!n) PetscFunctionReturn(0);
588 
589   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
590   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
591   tmp  = a->solve_work;
592 
593   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
594   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
595 
596   /* forward solve the lower triangular */
597   tmp[0] = b[*r++];
598   tmps   = tmp;
599   for (i=1; i<n; i++) {
600     v   = aa + ai[i] ;
601     vi  = aj + ai[i] ;
602     nz  = a->diag[i] - ai[i];
603     sum = b[*r++];
604     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
605     tmp[i] = sum;
606   }
607 
608   /* backward solve the upper triangular */
609   for (i=n-1; i>=0; i--){
610     v   = aa + a->diag[i] + 1;
611     vi  = aj + a->diag[i] + 1;
612     nz  = ai[i+1] - a->diag[i] - 1;
613     sum = tmp[i];
614     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
615     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
616   }
617 
618   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
619   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
620   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
621   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
622   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatMatSolve_SeqAIJ"
628 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
629 {
630   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
631   IS             iscol = a->col,isrow = a->row;
632   PetscErrorCode ierr;
633   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
634   PetscInt       nz,*rout,*cout,neq;
635   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
636 
637   PetscFunctionBegin;
638   if (!n) PetscFunctionReturn(0);
639 
640   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
641   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
642 
643   tmp  = a->solve_work;
644   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
645   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
646 
647   for (neq=0; neq<n; neq++){
648     /* forward solve the lower triangular */
649     tmp[0] = b[r[0]];
650     tmps   = tmp;
651     for (i=1; i<n; i++) {
652       v   = aa + ai[i] ;
653       vi  = aj + ai[i] ;
654       nz  = a->diag[i] - ai[i];
655       sum = b[r[i]];
656       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
657       tmp[i] = sum;
658     }
659     /* backward solve the upper triangular */
660     for (i=n-1; i>=0; i--){
661       v   = aa + a->diag[i] + 1;
662       vi  = aj + a->diag[i] + 1;
663       nz  = ai[i+1] - a->diag[i] - 1;
664       sum = tmp[i];
665       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
666       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
667     }
668 
669     b += n;
670     x += n;
671   }
672   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
673   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
674   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
675   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
676   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 /* ----------------------------------------------------------- */
681 #undef __FUNCT__
682 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
683 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
684 {
685   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
686   PetscErrorCode ierr;
687   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
688   PetscScalar    *x,*b,*aa = a->a;
689 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
690   PetscInt       adiag_i,i,*vi,nz,ai_i;
691   PetscScalar    *v,sum;
692 #endif
693 
694   PetscFunctionBegin;
695   if (!n) PetscFunctionReturn(0);
696 
697   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
698   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
699 
700 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
701   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
702 #else
703   /* forward solve the lower triangular */
704   x[0] = b[0];
705   for (i=1; i<n; i++) {
706     ai_i = ai[i];
707     v    = aa + ai_i;
708     vi   = aj + ai_i;
709     nz   = adiag[i] - ai_i;
710     sum  = b[i];
711     while (nz--) sum -= *v++ * x[*vi++];
712     x[i] = sum;
713   }
714 
715   /* backward solve the upper triangular */
716   for (i=n-1; i>=0; i--){
717     adiag_i = adiag[i];
718     v       = aa + adiag_i + 1;
719     vi      = aj + adiag_i + 1;
720     nz      = ai[i+1] - adiag_i - 1;
721     sum     = x[i];
722     while (nz--) sum -= *v++ * x[*vi++];
723     x[i]    = sum*aa[adiag_i];
724   }
725 #endif
726   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
727   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
728   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
734 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
735 {
736   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
737   IS             iscol = a->col,isrow = a->row;
738   PetscErrorCode ierr;
739   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
740   PetscInt       nz,*rout,*cout;
741   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
742 
743   PetscFunctionBegin;
744   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
745 
746   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
747   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
748   tmp  = a->solve_work;
749 
750   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
751   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
752 
753   /* forward solve the lower triangular */
754   tmp[0] = b[*r++];
755   for (i=1; i<n; i++) {
756     v   = aa + ai[i] ;
757     vi  = aj + ai[i] ;
758     nz  = a->diag[i] - ai[i];
759     sum = b[*r++];
760     while (nz--) sum -= *v++ * tmp[*vi++ ];
761     tmp[i] = sum;
762   }
763 
764   /* backward solve the upper triangular */
765   for (i=n-1; i>=0; i--){
766     v   = aa + a->diag[i] + 1;
767     vi  = aj + a->diag[i] + 1;
768     nz  = ai[i+1] - a->diag[i] - 1;
769     sum = tmp[i];
770     while (nz--) sum -= *v++ * tmp[*vi++ ];
771     tmp[i] = sum*aa[a->diag[i]];
772     x[*c--] += tmp[i];
773   }
774 
775   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
776   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
777   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
778   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
779   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
780 
781   PetscFunctionReturn(0);
782 }
783 /* -------------------------------------------------------------------*/
784 #undef __FUNCT__
785 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
786 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
787 {
788   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
789   IS             iscol = a->col,isrow = a->row;
790   PetscErrorCode ierr;
791   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
792   PetscInt       nz,*rout,*cout,*diag = a->diag;
793   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
794 
795   PetscFunctionBegin;
796   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
797   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
798   tmp  = a->solve_work;
799 
800   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
801   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
802 
803   /* copy the b into temp work space according to permutation */
804   for (i=0; i<n; i++) tmp[i] = b[c[i]];
805 
806   /* forward solve the U^T */
807   for (i=0; i<n; i++) {
808     v   = aa + diag[i] ;
809     vi  = aj + diag[i] + 1;
810     nz  = ai[i+1] - diag[i] - 1;
811     s1  = tmp[i];
812     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
813     while (nz--) {
814       tmp[*vi++ ] -= (*v++)*s1;
815     }
816     tmp[i] = s1;
817   }
818 
819   /* backward solve the L^T */
820   for (i=n-1; i>=0; i--){
821     v   = aa + diag[i] - 1 ;
822     vi  = aj + diag[i] - 1 ;
823     nz  = diag[i] - ai[i];
824     s1  = tmp[i];
825     while (nz--) {
826       tmp[*vi-- ] -= (*v--)*s1;
827     }
828   }
829 
830   /* copy tmp into x according to permutation */
831   for (i=0; i<n; i++) x[r[i]] = tmp[i];
832 
833   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
834   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
835   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
836   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
837 
838   ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr);
839   PetscFunctionReturn(0);
840 }
841 
842 #undef __FUNCT__
843 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
844 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
845 {
846   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
847   IS             iscol = a->col,isrow = a->row;
848   PetscErrorCode ierr;
849   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
850   PetscInt       nz,*rout,*cout,*diag = a->diag;
851   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
852 
853   PetscFunctionBegin;
854   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
855 
856   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
857   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
858   tmp = a->solve_work;
859 
860   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
861   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
862 
863   /* copy the b into temp work space according to permutation */
864   for (i=0; i<n; i++) tmp[i] = b[c[i]];
865 
866   /* forward solve the U^T */
867   for (i=0; i<n; i++) {
868     v   = aa + diag[i] ;
869     vi  = aj + diag[i] + 1;
870     nz  = ai[i+1] - diag[i] - 1;
871     tmp[i] *= *v++;
872     while (nz--) {
873       tmp[*vi++ ] -= (*v++)*tmp[i];
874     }
875   }
876 
877   /* backward solve the L^T */
878   for (i=n-1; i>=0; i--){
879     v   = aa + diag[i] - 1 ;
880     vi  = aj + diag[i] - 1 ;
881     nz  = diag[i] - ai[i];
882     while (nz--) {
883       tmp[*vi-- ] -= (*v--)*tmp[i];
884     }
885   }
886 
887   /* copy tmp into x according to permutation */
888   for (i=0; i<n; i++) x[r[i]] += tmp[i];
889 
890   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
891   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
892   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
893   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
894 
895   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
896   PetscFunctionReturn(0);
897 }
898 /* ----------------------------------------------------------------*/
899 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
903 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
904 {
905   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
906   IS                 isicol;
907   PetscErrorCode     ierr;
908   PetscInt           *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
909   PetscInt           *bi,*cols,nnz,*cols_lvl;
910   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
911   PetscInt           i,levels,diagonal_fill;
912   PetscTruth         col_identity,row_identity;
913   PetscReal          f;
914   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
915   PetscBT            lnkbt;
916   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
917   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
918   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
919 
920   PetscFunctionBegin;
921   f             = info->fill;
922   levels        = (PetscInt)info->levels;
923   diagonal_fill = (PetscInt)info->diagonal_fill;
924   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
925 
926   /* special case that simply copies fill pattern */
927   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
928   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
929   if (!levels && row_identity && col_identity) {
930     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
931     (*fact)->factor = FACTOR_LU;
932     b               = (Mat_SeqAIJ*)(*fact)->data;
933     if (!b->diag) {
934       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
935     }
936     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
937     b->row              = isrow;
938     b->col              = iscol;
939     b->icol             = isicol;
940     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
941     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
942     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
943     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
944     PetscFunctionReturn(0);
945   }
946 
947   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
948   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
949 
950   /* get new row pointers */
951   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
952   bi[0] = 0;
953   /* bdiag is location of diagonal in factor */
954   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
955   bdiag[0]  = 0;
956 
957   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
958   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
959 
960   /* create a linked list for storing column indices of the active row */
961   nlnk = n + 1;
962   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
963 
964   /* initial FreeSpace size is f*(ai[n]+1) */
965   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
966   current_space = free_space;
967   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
968   current_space_lvl = free_space_lvl;
969 
970   for (i=0; i<n; i++) {
971     nzi = 0;
972     /* copy current row into linked list */
973     nnz  = ai[r[i]+1] - ai[r[i]];
974     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
975     cols = aj + ai[r[i]];
976     lnk[i] = -1; /* marker to indicate if diagonal exists */
977     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
978     nzi += nlnk;
979 
980     /* make sure diagonal entry is included */
981     if (diagonal_fill && lnk[i] == -1) {
982       fm = n;
983       while (lnk[fm] < i) fm = lnk[fm];
984       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
985       lnk[fm]    = i;
986       lnk_lvl[i] = 0;
987       nzi++; dcount++;
988     }
989 
990     /* add pivot rows into the active row */
991     nzbd = 0;
992     prow = lnk[n];
993     while (prow < i) {
994       nnz      = bdiag[prow];
995       cols     = bj_ptr[prow] + nnz + 1;
996       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
997       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
998       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
999       nzi += nlnk;
1000       prow = lnk[prow];
1001       nzbd++;
1002     }
1003     bdiag[i] = nzbd;
1004     bi[i+1]  = bi[i] + nzi;
1005 
1006     /* if free space is not available, make more free space */
1007     if (current_space->local_remaining<nzi) {
1008       nnz = nzi*(n - i); /* estimated and max additional space needed */
1009       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1010       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1011       reallocs++;
1012     }
1013 
1014     /* copy data into free_space and free_space_lvl, then initialize lnk */
1015     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1016     bj_ptr[i]    = current_space->array;
1017     bjlvl_ptr[i] = current_space_lvl->array;
1018 
1019     /* make sure the active row i has diagonal entry */
1020     if (*(bj_ptr[i]+bdiag[i]) != i) {
1021       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1022     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
1023     }
1024 
1025     current_space->array           += nzi;
1026     current_space->local_used      += nzi;
1027     current_space->local_remaining -= nzi;
1028     current_space_lvl->array           += nzi;
1029     current_space_lvl->local_used      += nzi;
1030     current_space_lvl->local_remaining -= nzi;
1031   }
1032 
1033   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1034   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1035 
1036   /* destroy list of free space and other temporary arrays */
1037   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1038   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1039   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1040   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1041   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1042 
1043 #if defined(PETSC_USE_VERBOSE)
1044   {
1045     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1046     ierr = PetscVerboseInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
1047     ierr = PetscVerboseInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr);
1048     ierr = PetscVerboseInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr);
1049     ierr = PetscVerboseInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
1050     if (diagonal_fill) {
1051       ierr = PetscVerboseInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr);
1052     }
1053   }
1054 #endif
1055 
1056   /* put together the new matrix */
1057   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
1058   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
1059   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1060   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1061   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1062   b = (Mat_SeqAIJ*)(*fact)->data;
1063   b->freedata     = PETSC_TRUE;
1064   b->singlemalloc = PETSC_FALSE;
1065   len = (bi[n] )*sizeof(PetscScalar);
1066   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1067   b->j          = bj;
1068   b->i          = bi;
1069   for (i=0; i<n; i++) bdiag[i] += bi[i];
1070   b->diag       = bdiag;
1071   b->ilen       = 0;
1072   b->imax       = 0;
1073   b->row        = isrow;
1074   b->col        = iscol;
1075   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1076   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1077   b->icol       = isicol;
1078   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1079   /* In b structure:  Free imax, ilen, old a, old j.
1080      Allocate bdiag, solve_work, new a, new j */
1081   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1082   b->maxnz             = b->nz = bi[n] ;
1083   (*fact)->factor = FACTOR_LU;
1084   (*fact)->info.factor_mallocs    = reallocs;
1085   (*fact)->info.fill_ratio_given  = f;
1086   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1087 
1088   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1089   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1090 
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #include "src/mat/impls/sbaij/seq/sbaij.h"
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1097 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1098 {
1099   Mat            C = *B;
1100   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1101   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1102   IS             ip=b->row;
1103   PetscErrorCode ierr;
1104   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1105   PetscInt       *ai=a->i,*aj=a->j;
1106   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1107   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1108   PetscReal      zeropivot,rs,shiftnz;
1109   PetscReal      shiftpd;
1110   ChShift_Ctx    sctx;
1111   PetscInt       newshift;
1112 
1113   PetscFunctionBegin;
1114   shiftnz   = info->shiftnz;
1115   shiftpd   = info->shiftpd;
1116   zeropivot = info->zeropivot;
1117 
1118   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1119 
1120   /* initialization */
1121   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1122   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1123   jl   = il + mbs;
1124   rtmp = (MatScalar*)(jl + mbs);
1125 
1126   sctx.shift_amount = 0;
1127   sctx.nshift       = 0;
1128   do {
1129     sctx.chshift = PETSC_FALSE;
1130     for (i=0; i<mbs; i++) {
1131       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1132     }
1133 
1134     for (k = 0; k<mbs; k++){
1135       bval = ba + bi[k];
1136       /* initialize k-th row by the perm[k]-th row of A */
1137       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1138       for (j = jmin; j < jmax; j++){
1139         col = rip[aj[j]];
1140         if (col >= k){ /* only take upper triangular entry */
1141           rtmp[col] = aa[j];
1142           *bval++  = 0.0; /* for in-place factorization */
1143         }
1144       }
1145       /* shift the diagonal of the matrix */
1146       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1147 
1148       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1149       dk = rtmp[k];
1150       i = jl[k]; /* first row to be added to k_th row  */
1151 
1152       while (i < k){
1153         nexti = jl[i]; /* next row to be added to k_th row */
1154 
1155         /* compute multiplier, update diag(k) and U(i,k) */
1156         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1157         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1158         dk += uikdi*ba[ili];
1159         ba[ili] = uikdi; /* -U(i,k) */
1160 
1161         /* add multiple of row i to k-th row */
1162         jmin = ili + 1; jmax = bi[i+1];
1163         if (jmin < jmax){
1164           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1165           /* update il and jl for row i */
1166           il[i] = jmin;
1167           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1168         }
1169         i = nexti;
1170       }
1171 
1172       /* shift the diagonals when zero pivot is detected */
1173       /* compute rs=sum of abs(off-diagonal) */
1174       rs   = 0.0;
1175       jmin = bi[k]+1;
1176       nz   = bi[k+1] - jmin;
1177       if (nz){
1178         bcol = bj + jmin;
1179         while (nz--){
1180           rs += PetscAbsScalar(rtmp[*bcol]);
1181           bcol++;
1182         }
1183       }
1184 
1185       sctx.rs = rs;
1186       sctx.pv = dk;
1187       ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
1188       if (newshift == 1){
1189         break;    /* sctx.shift_amount is updated */
1190       } else if (newshift == -1){
1191         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1192       }
1193 
1194       /* copy data into U(k,:) */
1195       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1196       jmin = bi[k]+1; jmax = bi[k+1];
1197       if (jmin < jmax) {
1198         for (j=jmin; j<jmax; j++){
1199           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1200         }
1201         /* add the k-th row into il and jl */
1202         il[k] = jmin;
1203         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1204       }
1205     }
1206   } while (sctx.chshift);
1207   ierr = PetscFree(il);CHKERRQ(ierr);
1208 
1209   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1210   C->factor       = FACTOR_CHOLESKY;
1211   C->assembled    = PETSC_TRUE;
1212   C->preallocated = PETSC_TRUE;
1213   ierr = PetscLogFlops(C->m);CHKERRQ(ierr);
1214   if (sctx.nshift){
1215     if (shiftnz) {
1216       ierr = PetscVerboseInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
1217     } else if (shiftpd) {
1218       ierr = PetscVerboseInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
1219     }
1220   }
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1226 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1227 {
1228   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1229   Mat_SeqSBAIJ       *b;
1230   Mat                B;
1231   PetscErrorCode     ierr;
1232   PetscTruth         perm_identity;
1233   PetscInt           reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1234   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1235   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1236   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1237   PetscReal          fill=info->fill,levels=info->levels;
1238   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1239   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1240   PetscBT            lnkbt;
1241 
1242   PetscFunctionBegin;
1243   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1244   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1245 
1246   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1247   ui[0] = 0;
1248 
1249   /* special case that simply copies fill pattern */
1250   if (!levels && perm_identity) {
1251     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1252     for (i=0; i<am; i++) {
1253       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1254     }
1255     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1256     cols = uj;
1257     for (i=0; i<am; i++) {
1258       aj    = a->j + a->diag[i];
1259       ncols = ui[i+1] - ui[i];
1260       for (j=0; j<ncols; j++) *cols++ = *aj++;
1261     }
1262   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1263     /* initialization */
1264     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1265 
1266     /* jl: linked list for storing indices of the pivot rows
1267        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1268     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1269     il         = jl + am;
1270     uj_ptr     = (PetscInt**)(il + am);
1271     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1272     for (i=0; i<am; i++){
1273       jl[i] = am; il[i] = 0;
1274     }
1275 
1276     /* create and initialize a linked list for storing column indices of the active row k */
1277     nlnk = am + 1;
1278     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1279 
1280     /* initial FreeSpace size is fill*(ai[am]+1) */
1281     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1282     current_space = free_space;
1283     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1284     current_space_lvl = free_space_lvl;
1285 
1286     for (k=0; k<am; k++){  /* for each active row k */
1287       /* initialize lnk by the column indices of row rip[k] of A */
1288       nzk   = 0;
1289       ncols = ai[rip[k]+1] - ai[rip[k]];
1290       ncols_upper = 0;
1291       for (j=0; j<ncols; j++){
1292         i = *(aj + ai[rip[k]] + j);
1293         if (rip[i] >= k){ /* only take upper triangular entry */
1294           ajtmp[ncols_upper] = i;
1295           ncols_upper++;
1296         }
1297       }
1298       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1299       nzk += nlnk;
1300 
1301       /* update lnk by computing fill-in for each pivot row to be merged in */
1302       prow = jl[k]; /* 1st pivot row */
1303 
1304       while (prow < k){
1305         nextprow = jl[prow];
1306 
1307         /* merge prow into k-th row */
1308         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1309         jmax = ui[prow+1];
1310         ncols = jmax-jmin;
1311         i     = jmin - ui[prow];
1312         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1313         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1314         j     = *(uj - 1);
1315         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1316         nzk += nlnk;
1317 
1318         /* update il and jl for prow */
1319         if (jmin < jmax){
1320           il[prow] = jmin;
1321           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1322         }
1323         prow = nextprow;
1324       }
1325 
1326       /* if free space is not available, make more free space */
1327       if (current_space->local_remaining<nzk) {
1328         i = am - k + 1; /* num of unfactored rows */
1329         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1330         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1331         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1332         reallocs++;
1333       }
1334 
1335       /* copy data into free_space and free_space_lvl, then initialize lnk */
1336       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1337 
1338       /* add the k-th row into il and jl */
1339       if (nzk > 1){
1340         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1341         jl[k] = jl[i]; jl[i] = k;
1342         il[k] = ui[k] + 1;
1343       }
1344       uj_ptr[k]     = current_space->array;
1345       uj_lvl_ptr[k] = current_space_lvl->array;
1346 
1347       current_space->array           += nzk;
1348       current_space->local_used      += nzk;
1349       current_space->local_remaining -= nzk;
1350 
1351       current_space_lvl->array           += nzk;
1352       current_space_lvl->local_used      += nzk;
1353       current_space_lvl->local_remaining -= nzk;
1354 
1355       ui[k+1] = ui[k] + nzk;
1356     }
1357 
1358 #if defined(PETSC_USE_VERBOSE)
1359     if (ai[am] != 0) {
1360       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1361       ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1362       ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1363       ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1364     } else {
1365       ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1366     }
1367 #endif
1368 
1369     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1370     ierr = PetscFree(jl);CHKERRQ(ierr);
1371     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1372 
1373     /* destroy list of free space and other temporary array(s) */
1374     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1375     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1376     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1377     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1378 
1379   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1380 
1381   /* put together the new matrix in MATSEQSBAIJ format */
1382   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1383   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1384   B = *fact;
1385   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1386   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1387 
1388   b    = (Mat_SeqSBAIJ*)B->data;
1389   b->singlemalloc = PETSC_FALSE;
1390   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1391   b->j    = uj;
1392   b->i    = ui;
1393   b->diag = 0;
1394   b->ilen = 0;
1395   b->imax = 0;
1396   b->row  = perm;
1397   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1398   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1399   b->icol = perm;
1400   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1401   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1402   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1403   b->maxnz = b->nz = ui[am];
1404 
1405   B->factor                 = FACTOR_CHOLESKY;
1406   B->info.factor_mallocs    = reallocs;
1407   B->info.fill_ratio_given  = fill;
1408   if (ai[am] != 0) {
1409     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1410   } else {
1411     B->info.fill_ratio_needed = 0.0;
1412   }
1413   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1414   if (perm_identity){
1415     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1416     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1423 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1424 {
1425   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1426   Mat_SeqSBAIJ       *b;
1427   Mat                B;
1428   PetscErrorCode     ierr;
1429   PetscTruth         perm_identity;
1430   PetscReal          fill = info->fill;
1431   PetscInt           *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1432   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1433   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1434   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1435   PetscBT            lnkbt;
1436   IS                 iperm;
1437 
1438   PetscFunctionBegin;
1439   /* check whether perm is the identity mapping */
1440   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1441   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1442 
1443   if (!perm_identity){
1444     /* check if perm is symmetric! */
1445     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1446     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1447     for (i=0; i<am; i++) {
1448       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1449     }
1450     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1451     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1452   }
1453 
1454   /* initialization */
1455   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1456   ui[0] = 0;
1457 
1458   /* jl: linked list for storing indices of the pivot rows
1459      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1460   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1461   il     = jl + am;
1462   cols   = il + am;
1463   ui_ptr = (PetscInt**)(cols + am);
1464   for (i=0; i<am; i++){
1465     jl[i] = am; il[i] = 0;
1466   }
1467 
1468   /* create and initialize a linked list for storing column indices of the active row k */
1469   nlnk = am + 1;
1470   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1471 
1472   /* initial FreeSpace size is fill*(ai[am]+1) */
1473   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1474   current_space = free_space;
1475 
1476   for (k=0; k<am; k++){  /* for each active row k */
1477     /* initialize lnk by the column indices of row rip[k] of A */
1478     nzk   = 0;
1479     ncols = ai[rip[k]+1] - ai[rip[k]];
1480     ncols_upper = 0;
1481     for (j=0; j<ncols; j++){
1482       i = rip[*(aj + ai[rip[k]] + j)];
1483       if (i >= k){ /* only take upper triangular entry */
1484         cols[ncols_upper] = i;
1485         ncols_upper++;
1486       }
1487     }
1488     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1489     nzk += nlnk;
1490 
1491     /* update lnk by computing fill-in for each pivot row to be merged in */
1492     prow = jl[k]; /* 1st pivot row */
1493 
1494     while (prow < k){
1495       nextprow = jl[prow];
1496       /* merge prow into k-th row */
1497       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1498       jmax = ui[prow+1];
1499       ncols = jmax-jmin;
1500       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1501       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1502       nzk += nlnk;
1503 
1504       /* update il and jl for prow */
1505       if (jmin < jmax){
1506         il[prow] = jmin;
1507         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1508       }
1509       prow = nextprow;
1510     }
1511 
1512     /* if free space is not available, make more free space */
1513     if (current_space->local_remaining<nzk) {
1514       i = am - k + 1; /* num of unfactored rows */
1515       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1516       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1517       reallocs++;
1518     }
1519 
1520     /* copy data into free space, then initialize lnk */
1521     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1522 
1523     /* add the k-th row into il and jl */
1524     if (nzk-1 > 0){
1525       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1526       jl[k] = jl[i]; jl[i] = k;
1527       il[k] = ui[k] + 1;
1528     }
1529     ui_ptr[k] = current_space->array;
1530     current_space->array           += nzk;
1531     current_space->local_used      += nzk;
1532     current_space->local_remaining -= nzk;
1533 
1534     ui[k+1] = ui[k] + nzk;
1535   }
1536 
1537 #if defined(PETSC_USE_VERBOSE)
1538   if (ai[am] != 0) {
1539     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1540     ierr = PetscVerboseInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1541     ierr = PetscVerboseInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1542     ierr = PetscVerboseInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1543   } else {
1544      ierr = PetscVerboseInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1545   }
1546 #endif
1547 
1548   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1549   ierr = PetscFree(jl);CHKERRQ(ierr);
1550 
1551   /* destroy list of free space and other temporary array(s) */
1552   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1553   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1554   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1555 
1556   /* put together the new matrix in MATSEQSBAIJ format */
1557   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1558   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1559   B    = *fact;
1560   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1561   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1562 
1563   b = (Mat_SeqSBAIJ*)B->data;
1564   b->singlemalloc = PETSC_FALSE;
1565   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1566   b->j    = uj;
1567   b->i    = ui;
1568   b->diag = 0;
1569   b->ilen = 0;
1570   b->imax = 0;
1571   b->row  = perm;
1572   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1573   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1574   b->icol = perm;
1575   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1576   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1577   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1578   b->maxnz = b->nz = ui[am];
1579 
1580   B->factor                 = FACTOR_CHOLESKY;
1581   B->info.factor_mallocs    = reallocs;
1582   B->info.fill_ratio_given  = fill;
1583   if (ai[am] != 0) {
1584     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1585   } else {
1586     B->info.fill_ratio_needed = 0.0;
1587   }
1588   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1589   if (perm_identity){
1590     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1591     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1592   }
1593   PetscFunctionReturn(0);
1594 }
1595