xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 1d2e40055fe3bf783df36eeeccf2d7d5fcfab0fd)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
23 
24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
28 #endif
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
32   /* ------------------------------------------------------------
33 
34           This interface was contribed by Tony Caola
35 
36      This routine is an interface to the pivoting drop-tolerance
37      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
38      SPARSEKIT2.
39 
40      The SPARSEKIT2 routines used here are covered by the GNU
41      copyright; see the file gnu in this directory.
42 
43      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
44      help in getting this routine ironed out.
45 
46      The major drawback to this routine is that if info->fill is
47      not large enough it fails rather than allocating more space;
48      this can be fixed by hacking/improving the f2c version of
49      Yousef Saad's code.
50 
51      ------------------------------------------------------------
52 */
53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
54 {
55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
56   PetscFunctionBegin;
57   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
58   You can obtain the drop tolerance routines by installing PETSc from\n\
59   www.mcs.anl.gov/petsc\n");
60 #else
61   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
62   IS             iscolf,isicol,isirow;
63   PetscTruth     reorder;
64   PetscErrorCode ierr,sierr;
65   PetscInt       *c,*r,*ic,i,n = A->rmap.n;
66   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
67   PetscInt       *ordcol,*iwk,*iperm,*jw;
68   PetscInt       jmax,lfill,job,*o_i,*o_j;
69   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
70   PetscReal      af;
71 
72   PetscFunctionBegin;
73 
74   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
75   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
76   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
77   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
78   lfill   = (PetscInt)(info->dtcount/2.0);
79   jmax    = (PetscInt)(info->fill*a->nz);
80 
81 
82   /* ------------------------------------------------------------
83      If reorder=.TRUE., then the original matrix has to be
84      reordered to reflect the user selected ordering scheme, and
85      then de-reordered so it is in it's original format.
86      Because Saad's dperm() is NOT in place, we have to copy
87      the original matrix and allocate more storage. . .
88      ------------------------------------------------------------
89   */
90 
91   /* set reorder to true if either isrow or iscol is not identity */
92   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
93   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
94   reorder = PetscNot(reorder);
95 
96 
97   /* storage for ilu factor */
98   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
100   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
101   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
102 
103   /* ------------------------------------------------------------
104      Make sure that everything is Fortran formatted (1-Based)
105      ------------------------------------------------------------
106   */
107   for (i=old_i[0];i<old_i[n];i++) {
108     old_j[i]++;
109   }
110   for(i=0;i<n+1;i++) {
111     old_i[i]++;
112   };
113 
114 
115   if (reorder) {
116     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
117     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       r[i]  = r[i]+1;
120       c[i]  = c[i]+1;
121     }
122     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
123     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
124     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
125     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126     for (i=0;i<n;i++) {
127       r[i]  = r[i]-1;
128       c[i]  = c[i]-1;
129     }
130     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
131     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
132     o_a = old_a2;
133     o_j = old_j2;
134     o_i = old_i2;
135   } else {
136     o_a = old_a;
137     o_j = old_j;
138     o_i = old_i;
139   }
140 
141   /* ------------------------------------------------------------
142      Call Saad's ilutp() routine to generate the factorization
143      ------------------------------------------------------------
144   */
145 
146   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
147   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
148   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
149 
150   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151   if (sierr) {
152     switch (sierr) {
153       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
154       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
155       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159     }
160   }
161 
162   ierr = PetscFree(w);CHKERRQ(ierr);
163   ierr = PetscFree(jw);CHKERRQ(ierr);
164 
165   /* ------------------------------------------------------------
166      Saad's routine gives the result in Modified Sparse Row (msr)
167      Convert to Compressed Sparse Row format (csr)
168      ------------------------------------------------------------
169   */
170 
171   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
172   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
173 
174   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
175 
176   ierr = PetscFree(iwk);CHKERRQ(ierr);
177   ierr = PetscFree(wk);CHKERRQ(ierr);
178 
179   if (reorder) {
180     ierr = PetscFree(old_a2);CHKERRQ(ierr);
181     ierr = PetscFree(old_j2);CHKERRQ(ierr);
182     ierr = PetscFree(old_i2);CHKERRQ(ierr);
183   } else {
184     /* fix permutation of old_j that the factorization introduced */
185     for (i=old_i[0]; i<old_i[n]; i++) {
186       old_j[i-1] = iperm[old_j[i-1]-1];
187     }
188   }
189 
190   /* get rid of the shift to indices starting at 1 */
191   for (i=0; i<n+1; i++) {
192     old_i[i]--;
193   }
194   for (i=old_i[0];i<old_i[n];i++) {
195     old_j[i]--;
196   }
197 
198   /* Make the factored matrix 0-based */
199   for (i=0; i<n+1; i++) {
200     new_i[i]--;
201   }
202   for (i=new_i[0];i<new_i[n];i++) {
203     new_j[i]--;
204   }
205 
206   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207   /*-- permute the right-hand-side and solution vectors           --*/
208   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
209   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
210   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
211   for(i=0; i<n; i++) {
212     ordcol[i] = ic[iperm[i]-1];
213   };
214   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
215   ierr = ISDestroy(isicol);CHKERRQ(ierr);
216 
217   ierr = PetscFree(iperm);CHKERRQ(ierr);
218 
219   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
220   ierr = PetscFree(ordcol);CHKERRQ(ierr);
221 
222   /*----- put together the new matrix -----*/
223 
224   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
225   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
226   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
227   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
228   (*fact)->factor    = FACTOR_LU;
229   (*fact)->assembled = PETSC_TRUE;
230 
231   b = (Mat_SeqAIJ*)(*fact)->data;
232   b->freedata      = PETSC_TRUE;
233   b->sorted        = PETSC_FALSE;
234   b->singlemalloc  = PETSC_FALSE;
235   b->a             = new_a;
236   b->j             = new_j;
237   b->i             = new_i;
238   b->ilen          = 0;
239   b->imax          = 0;
240   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241   b->row           = isirow;
242   b->col           = iscolf;
243   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
244   b->maxnz = b->nz = new_i[n];
245   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
246   (*fact)->info.factor_mallocs = 0;
247 
248   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
249 
250   af = ((double)b->nz)/((double)a->nz) + .001;
251   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr);
252   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
253   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
254   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
255 
256   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
257 
258   PetscFunctionReturn(0);
259 #endif
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265 {
266   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
267   IS                 isicol;
268   PetscErrorCode     ierr;
269   PetscInt           *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
270   PetscInt           *bi,*bj,*ajtmp;
271   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272   PetscReal          f;
273   PetscInt           nlnk,*lnk,k,**bi_ptr;
274   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
275   PetscBT            lnkbt;
276 
277   PetscFunctionBegin;
278   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
280   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
281   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
282 
283   /* get new row pointers */
284   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
285   bi[0] = 0;
286 
287   /* bdiag is location of diagonal in factor */
288   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
289   bdiag[0] = 0;
290 
291   /* linked list for storing column indices of the active row */
292   nlnk = n + 1;
293   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
294 
295   ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr);
296   bi_ptr = (PetscInt**)(im + n);
297 
298   /* initial FreeSpace size is f*(ai[n]+1) */
299   f = info->fill;
300   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
301   current_space = free_space;
302 
303   for (i=0; i<n; i++) {
304     /* copy previous fill into linked list */
305     nzi = 0;
306     nnz = ai[r[i]+1] - ai[r[i]];
307     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
308     ajtmp = aj + ai[r[i]];
309     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
310     nzi += nlnk;
311 
312     /* add pivot rows into linked list */
313     row = lnk[n];
314     while (row < i) {
315       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
316       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
317       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
318       nzi += nlnk;
319       row  = lnk[row];
320     }
321     bi[i+1] = bi[i] + nzi;
322     im[i]   = nzi;
323 
324     /* mark bdiag */
325     nzbd = 0;
326     nnz  = nzi;
327     k    = lnk[n];
328     while (nnz-- && k < i){
329       nzbd++;
330       k = lnk[k];
331     }
332     bdiag[i] = bi[i] + nzbd;
333 
334     /* if free space is not available, make more free space */
335     if (current_space->local_remaining<nzi) {
336       nnz = (n - i)*nzi; /* estimated and max additional space needed */
337       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
338       reallocs++;
339     }
340 
341     /* copy data into free space, then initialize lnk */
342     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
343     bi_ptr[i] = current_space->array;
344     current_space->array           += nzi;
345     current_space->local_used      += nzi;
346     current_space->local_remaining -= nzi;
347   }
348 #if defined(PETSC_USE_INFO)
349   if (ai[n] != 0) {
350     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
351     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
352     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
353     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
354     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
355   } else {
356     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
357   }
358 #endif
359 
360   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
361   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
362 
363   /* destroy list of free space and other temporary array(s) */
364   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
365   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
366   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
367   ierr = PetscFree(im);CHKERRQ(ierr);
368 
369   /* put together the new matrix */
370   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
371   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
372   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
373   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
374   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
375   b    = (Mat_SeqAIJ*)(*B)->data;
376   b->freedata     = PETSC_TRUE;
377   b->singlemalloc = PETSC_FALSE;
378   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
379   b->j          = bj;
380   b->i          = bi;
381   b->diag       = bdiag;
382   b->ilen       = 0;
383   b->imax       = 0;
384   b->row        = isrow;
385   b->col        = iscol;
386   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
387   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
388   b->icol       = isicol;
389   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
390 
391   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
392   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
393   b->maxnz = b->nz = bi[n] ;
394 
395   (*B)->factor                 =  FACTOR_LU;
396   (*B)->info.factor_mallocs    = reallocs;
397   (*B)->info.fill_ratio_given  = f;
398 
399   if (ai[n] != 0) {
400     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
401   } else {
402     (*B)->info.fill_ratio_needed = 0.0;
403   }
404   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
405   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
406   PetscFunctionReturn(0);
407 }
408 
409 /* ----------------------------------------------------------- */
410 #undef __FUNCT__
411 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
412 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
413 {
414   Mat            C=*B;
415   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
416   IS             isrow = b->row,isicol = b->icol;
417   PetscErrorCode ierr;
418   PetscInt       *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
419   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
420   PetscInt       *diag_offset = b->diag,diag,*pj;
421   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
422   PetscScalar    d;
423   PetscReal      rs;
424   LUShift_Ctx    sctx;
425   PetscInt       newshift;
426 
427   PetscFunctionBegin;
428   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
429   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
430   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
431   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
432   rtmps = rtmp; ics = ic;
433 
434   sctx.shift_top  = 0;
435   sctx.nshift_max = 0;
436   sctx.shift_lo   = 0;
437   sctx.shift_hi   = 0;
438 
439   if (!a->diag) {
440     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
441   }
442   /* if both shift schemes are chosen by user, only use info->shiftpd */
443   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
444   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
445     PetscInt *aai = a->i,*ddiag = a->diag;
446     sctx.shift_top = 0;
447     for (i=0; i<n; i++) {
448       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
449       d  = (a->a)[ddiag[i]];
450       rs = -PetscAbsScalar(d) - PetscRealPart(d);
451       v  = a->a+aai[i];
452       nz = aai[i+1] - aai[i];
453       for (j=0; j<nz; j++)
454 	rs += PetscAbsScalar(v[j]);
455       if (rs>sctx.shift_top) sctx.shift_top = rs;
456     }
457     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
458     sctx.shift_top    *= 1.1;
459     sctx.nshift_max   = 5;
460     sctx.shift_lo     = 0.;
461     sctx.shift_hi     = 1.;
462   }
463 
464   sctx.shift_amount = 0;
465   sctx.nshift       = 0;
466   do {
467     sctx.lushift = PETSC_FALSE;
468     for (i=0; i<n; i++){
469       nz    = bi[i+1] - bi[i];
470       bjtmp = bj + bi[i];
471       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
472 
473       /* load in initial (unfactored row) */
474       nz    = a->i[r[i]+1] - a->i[r[i]];
475       ajtmp = a->j + a->i[r[i]];
476       v     = a->a + a->i[r[i]];
477       for (j=0; j<nz; j++) {
478         rtmp[ics[ajtmp[j]]] = v[j];
479       }
480       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
481 
482       row = *bjtmp++;
483       while  (row < i) {
484         pc = rtmp + row;
485         if (*pc != 0.0) {
486           pv         = b->a + diag_offset[row];
487           pj         = b->j + diag_offset[row] + 1;
488           multiplier = *pc / *pv++;
489           *pc        = multiplier;
490           nz         = bi[row+1] - diag_offset[row] - 1;
491           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
492           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
493         }
494         row = *bjtmp++;
495       }
496       /* finished row so stick it into b->a */
497       pv   = b->a + bi[i] ;
498       pj   = b->j + bi[i] ;
499       nz   = bi[i+1] - bi[i];
500       diag = diag_offset[i] - bi[i];
501       rs   = 0.0;
502       for (j=0; j<nz; j++) {
503         pv[j] = rtmps[pj[j]];
504         if (j != diag) rs += PetscAbsScalar(pv[j]);
505       }
506 
507       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
508       sctx.rs  = rs;
509       sctx.pv  = pv[diag];
510       ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
511       if (newshift == 1){
512         break;    /* sctx.shift_amount is updated */
513       } else if (newshift == -1){
514         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %G tolerance %G * rs %G",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
515       }
516     }
517 
518     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
519       /*
520        * if no shift in this attempt & shifting & started shifting & can refine,
521        * then try lower shift
522        */
523       sctx.shift_hi        = info->shift_fraction;
524       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
525       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
526       sctx.lushift         = PETSC_TRUE;
527       sctx.nshift++;
528     }
529   } while (sctx.lushift);
530 
531   /* invert diagonal entries for simplier triangular solves */
532   for (i=0; i<n; i++) {
533     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
534   }
535 
536   ierr = PetscFree(rtmp);CHKERRQ(ierr);
537   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
538   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
539   C->factor = FACTOR_LU;
540   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
541   C->assembled = PETSC_TRUE;
542   ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr);
543   if (sctx.nshift){
544     if (info->shiftnz) {
545       ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
546     } else if (info->shiftpd) {
547       ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
548     }
549   }
550   PetscFunctionReturn(0);
551 }
552 
553 #undef __FUNCT__
554 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
555 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
556 {
557   PetscFunctionBegin;
558   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
559   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
560   PetscFunctionReturn(0);
561 }
562 
563 
564 /* ----------------------------------------------------------- */
565 #undef __FUNCT__
566 #define __FUNCT__ "MatLUFactor_SeqAIJ"
567 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
568 {
569   PetscErrorCode ierr;
570   Mat            C;
571 
572   PetscFunctionBegin;
573   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
574   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
575   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
576   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 /* ----------------------------------------------------------- */
580 #undef __FUNCT__
581 #define __FUNCT__ "MatSolve_SeqAIJ"
582 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
583 {
584   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
585   IS             iscol = a->col,isrow = a->row;
586   PetscErrorCode ierr;
587   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
588   PetscInt       nz,*rout,*cout;
589   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
590 
591   PetscFunctionBegin;
592   if (!n) PetscFunctionReturn(0);
593 
594   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
595   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
596   tmp  = a->solve_work;
597 
598   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
599   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
600 
601   /* forward solve the lower triangular */
602   tmp[0] = b[*r++];
603   tmps   = tmp;
604   for (i=1; i<n; i++) {
605     v   = aa + ai[i] ;
606     vi  = aj + ai[i] ;
607     nz  = a->diag[i] - ai[i];
608     sum = b[*r++];
609     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
610     tmp[i] = sum;
611   }
612 
613   /* backward solve the upper triangular */
614   for (i=n-1; i>=0; i--){
615     v   = aa + a->diag[i] + 1;
616     vi  = aj + a->diag[i] + 1;
617     nz  = ai[i+1] - a->diag[i] - 1;
618     sum = tmp[i];
619     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
620     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
621   }
622 
623   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
624   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
625   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
626   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
627   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatMatSolve_SeqAIJ"
633 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
634 {
635   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
636   IS             iscol = a->col,isrow = a->row;
637   PetscErrorCode ierr;
638   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
639   PetscInt       nz,*rout,*cout,neq;
640   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
641 
642   PetscFunctionBegin;
643   if (!n) PetscFunctionReturn(0);
644 
645   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
646   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
647 
648   tmp  = a->solve_work;
649   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
650   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
651 
652   for (neq=0; neq<n; neq++){
653     /* forward solve the lower triangular */
654     tmp[0] = b[r[0]];
655     tmps   = tmp;
656     for (i=1; i<n; i++) {
657       v   = aa + ai[i] ;
658       vi  = aj + ai[i] ;
659       nz  = a->diag[i] - ai[i];
660       sum = b[r[i]];
661       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
662       tmp[i] = sum;
663     }
664     /* backward solve the upper triangular */
665     for (i=n-1; i>=0; i--){
666       v   = aa + a->diag[i] + 1;
667       vi  = aj + a->diag[i] + 1;
668       nz  = ai[i+1] - a->diag[i] - 1;
669       sum = tmp[i];
670       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
671       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
672     }
673 
674     b += n;
675     x += n;
676   }
677   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
678   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
679   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
680   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
681   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 /* ----------------------------------------------------------- */
686 #undef __FUNCT__
687 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
688 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
689 {
690   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
691   PetscErrorCode ierr;
692   PetscInt       n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
693   PetscScalar    *x,*b,*aa = a->a;
694 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
695   PetscInt       adiag_i,i,*vi,nz,ai_i;
696   PetscScalar    *v,sum;
697 #endif
698 
699   PetscFunctionBegin;
700   if (!n) PetscFunctionReturn(0);
701 
702   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
703   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
704 
705 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
706   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
707 #else
708   /* forward solve the lower triangular */
709   x[0] = b[0];
710   for (i=1; i<n; i++) {
711     ai_i = ai[i];
712     v    = aa + ai_i;
713     vi   = aj + ai_i;
714     nz   = adiag[i] - ai_i;
715     sum  = b[i];
716     while (nz--) sum -= *v++ * x[*vi++];
717     x[i] = sum;
718   }
719 
720   /* backward solve the upper triangular */
721   for (i=n-1; i>=0; i--){
722     adiag_i = adiag[i];
723     v       = aa + adiag_i + 1;
724     vi      = aj + adiag_i + 1;
725     nz      = ai[i+1] - adiag_i - 1;
726     sum     = x[i];
727     while (nz--) sum -= *v++ * x[*vi++];
728     x[i]    = sum*aa[adiag_i];
729   }
730 #endif
731   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
732   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
733   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNCT__
738 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
739 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
740 {
741   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
742   IS             iscol = a->col,isrow = a->row;
743   PetscErrorCode ierr;
744   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
745   PetscInt       nz,*rout,*cout;
746   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
747 
748   PetscFunctionBegin;
749   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
750 
751   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
752   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
753   tmp  = a->solve_work;
754 
755   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
756   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
757 
758   /* forward solve the lower triangular */
759   tmp[0] = b[*r++];
760   for (i=1; i<n; i++) {
761     v   = aa + ai[i] ;
762     vi  = aj + ai[i] ;
763     nz  = a->diag[i] - ai[i];
764     sum = b[*r++];
765     while (nz--) sum -= *v++ * tmp[*vi++ ];
766     tmp[i] = sum;
767   }
768 
769   /* backward solve the upper triangular */
770   for (i=n-1; i>=0; i--){
771     v   = aa + a->diag[i] + 1;
772     vi  = aj + a->diag[i] + 1;
773     nz  = ai[i+1] - a->diag[i] - 1;
774     sum = tmp[i];
775     while (nz--) sum -= *v++ * tmp[*vi++ ];
776     tmp[i] = sum*aa[a->diag[i]];
777     x[*c--] += tmp[i];
778   }
779 
780   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
781   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
782   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
783   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
784   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
785 
786   PetscFunctionReturn(0);
787 }
788 /* -------------------------------------------------------------------*/
789 #undef __FUNCT__
790 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
791 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
792 {
793   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
794   IS             iscol = a->col,isrow = a->row;
795   PetscErrorCode ierr;
796   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
797   PetscInt       nz,*rout,*cout,*diag = a->diag;
798   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
799 
800   PetscFunctionBegin;
801   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
802   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
803   tmp  = a->solve_work;
804 
805   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
806   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
807 
808   /* copy the b into temp work space according to permutation */
809   for (i=0; i<n; i++) tmp[i] = b[c[i]];
810 
811   /* forward solve the U^T */
812   for (i=0; i<n; i++) {
813     v   = aa + diag[i] ;
814     vi  = aj + diag[i] + 1;
815     nz  = ai[i+1] - diag[i] - 1;
816     s1  = tmp[i];
817     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
818     while (nz--) {
819       tmp[*vi++ ] -= (*v++)*s1;
820     }
821     tmp[i] = s1;
822   }
823 
824   /* backward solve the L^T */
825   for (i=n-1; i>=0; i--){
826     v   = aa + diag[i] - 1 ;
827     vi  = aj + diag[i] - 1 ;
828     nz  = diag[i] - ai[i];
829     s1  = tmp[i];
830     while (nz--) {
831       tmp[*vi-- ] -= (*v--)*s1;
832     }
833   }
834 
835   /* copy tmp into x according to permutation */
836   for (i=0; i<n; i++) x[r[i]] = tmp[i];
837 
838   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
839   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
840   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
841   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
842 
843   ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr);
844   PetscFunctionReturn(0);
845 }
846 
847 #undef __FUNCT__
848 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
849 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
850 {
851   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
852   IS             iscol = a->col,isrow = a->row;
853   PetscErrorCode ierr;
854   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
855   PetscInt       nz,*rout,*cout,*diag = a->diag;
856   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
857 
858   PetscFunctionBegin;
859   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
860 
861   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
862   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
863   tmp = a->solve_work;
864 
865   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
866   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
867 
868   /* copy the b into temp work space according to permutation */
869   for (i=0; i<n; i++) tmp[i] = b[c[i]];
870 
871   /* forward solve the U^T */
872   for (i=0; i<n; i++) {
873     v   = aa + diag[i] ;
874     vi  = aj + diag[i] + 1;
875     nz  = ai[i+1] - diag[i] - 1;
876     tmp[i] *= *v++;
877     while (nz--) {
878       tmp[*vi++ ] -= (*v++)*tmp[i];
879     }
880   }
881 
882   /* backward solve the L^T */
883   for (i=n-1; i>=0; i--){
884     v   = aa + diag[i] - 1 ;
885     vi  = aj + diag[i] - 1 ;
886     nz  = diag[i] - ai[i];
887     while (nz--) {
888       tmp[*vi-- ] -= (*v--)*tmp[i];
889     }
890   }
891 
892   /* copy tmp into x according to permutation */
893   for (i=0; i<n; i++) x[r[i]] += tmp[i];
894 
895   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
896   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
897   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
898   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
899 
900   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 /* ----------------------------------------------------------------*/
904 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
905 
906 #undef __FUNCT__
907 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
908 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
909 {
910   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
911   IS                 isicol;
912   PetscErrorCode     ierr;
913   PetscInt           *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j;
914   PetscInt           *bi,*cols,nnz,*cols_lvl;
915   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
916   PetscInt           i,levels,diagonal_fill;
917   PetscTruth         col_identity,row_identity;
918   PetscReal          f;
919   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
920   PetscBT            lnkbt;
921   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
922   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
923   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
924 
925   PetscFunctionBegin;
926   f             = info->fill;
927   levels        = (PetscInt)info->levels;
928   diagonal_fill = (PetscInt)info->diagonal_fill;
929   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
930 
931   /* special case that simply copies fill pattern */
932   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
933   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
934   if (!levels && row_identity && col_identity) {
935     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
936     (*fact)->factor = FACTOR_LU;
937     b               = (Mat_SeqAIJ*)(*fact)->data;
938     if (!b->diag) {
939       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
940     }
941     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
942     b->row              = isrow;
943     b->col              = iscol;
944     b->icol             = isicol;
945     ierr                = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
946     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
947     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
948     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
949     PetscFunctionReturn(0);
950   }
951 
952   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
953   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
954 
955   /* get new row pointers */
956   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
957   bi[0] = 0;
958   /* bdiag is location of diagonal in factor */
959   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
960   bdiag[0]  = 0;
961 
962   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
963   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
964 
965   /* create a linked list for storing column indices of the active row */
966   nlnk = n + 1;
967   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
968 
969   /* initial FreeSpace size is f*(ai[n]+1) */
970   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
971   current_space = free_space;
972   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
973   current_space_lvl = free_space_lvl;
974 
975   for (i=0; i<n; i++) {
976     nzi = 0;
977     /* copy current row into linked list */
978     nnz  = ai[r[i]+1] - ai[r[i]];
979     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
980     cols = aj + ai[r[i]];
981     lnk[i] = -1; /* marker to indicate if diagonal exists */
982     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
983     nzi += nlnk;
984 
985     /* make sure diagonal entry is included */
986     if (diagonal_fill && lnk[i] == -1) {
987       fm = n;
988       while (lnk[fm] < i) fm = lnk[fm];
989       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
990       lnk[fm]    = i;
991       lnk_lvl[i] = 0;
992       nzi++; dcount++;
993     }
994 
995     /* add pivot rows into the active row */
996     nzbd = 0;
997     prow = lnk[n];
998     while (prow < i) {
999       nnz      = bdiag[prow];
1000       cols     = bj_ptr[prow] + nnz + 1;
1001       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1002       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1003       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1004       nzi += nlnk;
1005       prow = lnk[prow];
1006       nzbd++;
1007     }
1008     bdiag[i] = nzbd;
1009     bi[i+1]  = bi[i] + nzi;
1010 
1011     /* if free space is not available, make more free space */
1012     if (current_space->local_remaining<nzi) {
1013       nnz = nzi*(n - i); /* estimated and max additional space needed */
1014       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1015       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1016       reallocs++;
1017     }
1018 
1019     /* copy data into free_space and free_space_lvl, then initialize lnk */
1020     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1021     bj_ptr[i]    = current_space->array;
1022     bjlvl_ptr[i] = current_space_lvl->array;
1023 
1024     /* make sure the active row i has diagonal entry */
1025     if (*(bj_ptr[i]+bdiag[i]) != i) {
1026       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1027     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
1028     }
1029 
1030     current_space->array           += nzi;
1031     current_space->local_used      += nzi;
1032     current_space->local_remaining -= nzi;
1033     current_space_lvl->array           += nzi;
1034     current_space_lvl->local_used      += nzi;
1035     current_space_lvl->local_remaining -= nzi;
1036   }
1037 
1038   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1039   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1040 
1041   /* destroy list of free space and other temporary arrays */
1042   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1043   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1044   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1045   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1046   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1047 
1048 #if defined(PETSC_USE_INFO)
1049   {
1050     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1051     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1052     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1053     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1054     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1055     if (diagonal_fill) {
1056       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1057     }
1058   }
1059 #endif
1060 
1061   /* put together the new matrix */
1062   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
1063   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
1064   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1065   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1066   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1067   b = (Mat_SeqAIJ*)(*fact)->data;
1068   b->freedata     = PETSC_TRUE;
1069   b->singlemalloc = PETSC_FALSE;
1070   len = (bi[n] )*sizeof(PetscScalar);
1071   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1072   b->j          = bj;
1073   b->i          = bi;
1074   for (i=0; i<n; i++) bdiag[i] += bi[i];
1075   b->diag       = bdiag;
1076   b->ilen       = 0;
1077   b->imax       = 0;
1078   b->row        = isrow;
1079   b->col        = iscol;
1080   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1081   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1082   b->icol       = isicol;
1083   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1084   /* In b structure:  Free imax, ilen, old a, old j.
1085      Allocate bdiag, solve_work, new a, new j */
1086   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1087   b->maxnz             = b->nz = bi[n] ;
1088   (*fact)->factor = FACTOR_LU;
1089   (*fact)->info.factor_mallocs    = reallocs;
1090   (*fact)->info.fill_ratio_given  = f;
1091   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1092 
1093   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1094   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1095 
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #include "src/mat/impls/sbaij/seq/sbaij.h"
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1102 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1103 {
1104   Mat            C = *B;
1105   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1106   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1107   IS             ip=b->row;
1108   PetscErrorCode ierr;
1109   PetscInt       *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1110   PetscInt       *ai=a->i,*aj=a->j;
1111   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1112   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1113   PetscReal      zeropivot,rs,shiftnz;
1114   PetscReal      shiftpd;
1115   ChShift_Ctx    sctx;
1116   PetscInt       newshift;
1117 
1118   PetscFunctionBegin;
1119   shiftnz   = info->shiftnz;
1120   shiftpd   = info->shiftpd;
1121   zeropivot = info->zeropivot;
1122 
1123   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1124 
1125   /* initialization */
1126   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1127   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1128   jl   = il + mbs;
1129   rtmp = (MatScalar*)(jl + mbs);
1130 
1131   sctx.shift_amount = 0;
1132   sctx.nshift       = 0;
1133   do {
1134     sctx.chshift = PETSC_FALSE;
1135     for (i=0; i<mbs; i++) {
1136       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1137     }
1138 
1139     for (k = 0; k<mbs; k++){
1140       bval = ba + bi[k];
1141       /* initialize k-th row by the perm[k]-th row of A */
1142       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1143       for (j = jmin; j < jmax; j++){
1144         col = rip[aj[j]];
1145         if (col >= k){ /* only take upper triangular entry */
1146           rtmp[col] = aa[j];
1147           *bval++  = 0.0; /* for in-place factorization */
1148         }
1149       }
1150       /* shift the diagonal of the matrix */
1151       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1152 
1153       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1154       dk = rtmp[k];
1155       i = jl[k]; /* first row to be added to k_th row  */
1156 
1157       while (i < k){
1158         nexti = jl[i]; /* next row to be added to k_th row */
1159 
1160         /* compute multiplier, update diag(k) and U(i,k) */
1161         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1162         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1163         dk += uikdi*ba[ili];
1164         ba[ili] = uikdi; /* -U(i,k) */
1165 
1166         /* add multiple of row i to k-th row */
1167         jmin = ili + 1; jmax = bi[i+1];
1168         if (jmin < jmax){
1169           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1170           /* update il and jl for row i */
1171           il[i] = jmin;
1172           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1173         }
1174         i = nexti;
1175       }
1176 
1177       /* shift the diagonals when zero pivot is detected */
1178       /* compute rs=sum of abs(off-diagonal) */
1179       rs   = 0.0;
1180       jmin = bi[k]+1;
1181       nz   = bi[k+1] - jmin;
1182       if (nz){
1183         bcol = bj + jmin;
1184         while (nz--){
1185           rs += PetscAbsScalar(rtmp[*bcol]);
1186           bcol++;
1187         }
1188       }
1189 
1190       sctx.rs = rs;
1191       sctx.pv = dk;
1192       ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
1193       if (newshift == 1){
1194         break;    /* sctx.shift_amount is updated */
1195       } else if (newshift == -1){
1196         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %G tolerance %G * rs %G",k,PetscAbsScalar(dk),zeropivot,rs);
1197       }
1198 
1199       /* copy data into U(k,:) */
1200       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1201       jmin = bi[k]+1; jmax = bi[k+1];
1202       if (jmin < jmax) {
1203         for (j=jmin; j<jmax; j++){
1204           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1205         }
1206         /* add the k-th row into il and jl */
1207         il[k] = jmin;
1208         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1209       }
1210     }
1211   } while (sctx.chshift);
1212   ierr = PetscFree(il);CHKERRQ(ierr);
1213 
1214   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1215   C->factor       = FACTOR_CHOLESKY;
1216   C->assembled    = PETSC_TRUE;
1217   C->preallocated = PETSC_TRUE;
1218   ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr);
1219   if (sctx.nshift){
1220     if (shiftnz) {
1221       ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1222     } else if (shiftpd) {
1223       ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1224     }
1225   }
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1231 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1232 {
1233   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1234   Mat_SeqSBAIJ       *b;
1235   Mat                B;
1236   PetscErrorCode     ierr;
1237   PetscTruth         perm_identity;
1238   PetscInt           reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1239   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1240   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1241   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1242   PetscReal          fill=info->fill,levels=info->levels;
1243   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1244   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1245   PetscBT            lnkbt;
1246 
1247   PetscFunctionBegin;
1248   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1249   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1250 
1251   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1252   ui[0] = 0;
1253 
1254   /* special case that simply copies fill pattern */
1255   if (!levels && perm_identity) {
1256     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1257     for (i=0; i<am; i++) {
1258       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1259     }
1260     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1261     cols = uj;
1262     for (i=0; i<am; i++) {
1263       aj    = a->j + a->diag[i];
1264       ncols = ui[i+1] - ui[i];
1265       for (j=0; j<ncols; j++) *cols++ = *aj++;
1266     }
1267   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1268     /* initialization */
1269     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1270 
1271     /* jl: linked list for storing indices of the pivot rows
1272        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1273     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1274     il         = jl + am;
1275     uj_ptr     = (PetscInt**)(il + am);
1276     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1277     for (i=0; i<am; i++){
1278       jl[i] = am; il[i] = 0;
1279     }
1280 
1281     /* create and initialize a linked list for storing column indices of the active row k */
1282     nlnk = am + 1;
1283     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1284 
1285     /* initial FreeSpace size is fill*(ai[am]+1) */
1286     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1287     current_space = free_space;
1288     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1289     current_space_lvl = free_space_lvl;
1290 
1291     for (k=0; k<am; k++){  /* for each active row k */
1292       /* initialize lnk by the column indices of row rip[k] of A */
1293       nzk   = 0;
1294       ncols = ai[rip[k]+1] - ai[rip[k]];
1295       ncols_upper = 0;
1296       for (j=0; j<ncols; j++){
1297         i = *(aj + ai[rip[k]] + j);
1298         if (rip[i] >= k){ /* only take upper triangular entry */
1299           ajtmp[ncols_upper] = i;
1300           ncols_upper++;
1301         }
1302       }
1303       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1304       nzk += nlnk;
1305 
1306       /* update lnk by computing fill-in for each pivot row to be merged in */
1307       prow = jl[k]; /* 1st pivot row */
1308 
1309       while (prow < k){
1310         nextprow = jl[prow];
1311 
1312         /* merge prow into k-th row */
1313         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1314         jmax = ui[prow+1];
1315         ncols = jmax-jmin;
1316         i     = jmin - ui[prow];
1317         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1318         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1319         j     = *(uj - 1);
1320         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1321         nzk += nlnk;
1322 
1323         /* update il and jl for prow */
1324         if (jmin < jmax){
1325           il[prow] = jmin;
1326           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1327         }
1328         prow = nextprow;
1329       }
1330 
1331       /* if free space is not available, make more free space */
1332       if (current_space->local_remaining<nzk) {
1333         i = am - k + 1; /* num of unfactored rows */
1334         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1335         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1336         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1337         reallocs++;
1338       }
1339 
1340       /* copy data into free_space and free_space_lvl, then initialize lnk */
1341       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1342 
1343       /* add the k-th row into il and jl */
1344       if (nzk > 1){
1345         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1346         jl[k] = jl[i]; jl[i] = k;
1347         il[k] = ui[k] + 1;
1348       }
1349       uj_ptr[k]     = current_space->array;
1350       uj_lvl_ptr[k] = current_space_lvl->array;
1351 
1352       current_space->array           += nzk;
1353       current_space->local_used      += nzk;
1354       current_space->local_remaining -= nzk;
1355 
1356       current_space_lvl->array           += nzk;
1357       current_space_lvl->local_used      += nzk;
1358       current_space_lvl->local_remaining -= nzk;
1359 
1360       ui[k+1] = ui[k] + nzk;
1361     }
1362 
1363 #if defined(PETSC_USE_INFO)
1364     if (ai[am] != 0) {
1365       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1366       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1367       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1368       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1369     } else {
1370       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1371     }
1372 #endif
1373 
1374     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1375     ierr = PetscFree(jl);CHKERRQ(ierr);
1376     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1377 
1378     /* destroy list of free space and other temporary array(s) */
1379     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1380     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1381     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1382     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1383 
1384   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1385 
1386   /* put together the new matrix in MATSEQSBAIJ format */
1387   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1388   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1389   B = *fact;
1390   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1391   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1392 
1393   b    = (Mat_SeqSBAIJ*)B->data;
1394   b->singlemalloc = PETSC_FALSE;
1395   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1396   b->j    = uj;
1397   b->i    = ui;
1398   b->diag = 0;
1399   b->ilen = 0;
1400   b->imax = 0;
1401   b->row  = perm;
1402   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1403   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1404   b->icol = perm;
1405   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1406   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1407   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1408   b->maxnz = b->nz = ui[am];
1409 
1410   B->factor                 = FACTOR_CHOLESKY;
1411   B->info.factor_mallocs    = reallocs;
1412   B->info.fill_ratio_given  = fill;
1413   if (ai[am] != 0) {
1414     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1415   } else {
1416     B->info.fill_ratio_needed = 0.0;
1417   }
1418   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1419   if (perm_identity){
1420     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1421     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1428 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1429 {
1430   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1431   Mat_SeqSBAIJ       *b;
1432   Mat                B;
1433   PetscErrorCode     ierr;
1434   PetscTruth         perm_identity;
1435   PetscReal          fill = info->fill;
1436   PetscInt           *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1437   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1438   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1439   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1440   PetscBT            lnkbt;
1441   IS                 iperm;
1442 
1443   PetscFunctionBegin;
1444   /* check whether perm is the identity mapping */
1445   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1446   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1447 
1448   if (!perm_identity){
1449     /* check if perm is symmetric! */
1450     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1451     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1452     for (i=0; i<am; i++) {
1453       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1454     }
1455     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1456     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1457   }
1458 
1459   /* initialization */
1460   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1461   ui[0] = 0;
1462 
1463   /* jl: linked list for storing indices of the pivot rows
1464      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1465   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1466   il     = jl + am;
1467   cols   = il + am;
1468   ui_ptr = (PetscInt**)(cols + am);
1469   for (i=0; i<am; i++){
1470     jl[i] = am; il[i] = 0;
1471   }
1472 
1473   /* create and initialize a linked list for storing column indices of the active row k */
1474   nlnk = am + 1;
1475   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1476 
1477   /* initial FreeSpace size is fill*(ai[am]+1) */
1478   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1479   current_space = free_space;
1480 
1481   for (k=0; k<am; k++){  /* for each active row k */
1482     /* initialize lnk by the column indices of row rip[k] of A */
1483     nzk   = 0;
1484     ncols = ai[rip[k]+1] - ai[rip[k]];
1485     ncols_upper = 0;
1486     for (j=0; j<ncols; j++){
1487       i = rip[*(aj + ai[rip[k]] + j)];
1488       if (i >= k){ /* only take upper triangular entry */
1489         cols[ncols_upper] = i;
1490         ncols_upper++;
1491       }
1492     }
1493     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1494     nzk += nlnk;
1495 
1496     /* update lnk by computing fill-in for each pivot row to be merged in */
1497     prow = jl[k]; /* 1st pivot row */
1498 
1499     while (prow < k){
1500       nextprow = jl[prow];
1501       /* merge prow into k-th row */
1502       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1503       jmax = ui[prow+1];
1504       ncols = jmax-jmin;
1505       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1506       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1507       nzk += nlnk;
1508 
1509       /* update il and jl for prow */
1510       if (jmin < jmax){
1511         il[prow] = jmin;
1512         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1513       }
1514       prow = nextprow;
1515     }
1516 
1517     /* if free space is not available, make more free space */
1518     if (current_space->local_remaining<nzk) {
1519       i = am - k + 1; /* num of unfactored rows */
1520       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1521       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1522       reallocs++;
1523     }
1524 
1525     /* copy data into free space, then initialize lnk */
1526     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1527 
1528     /* add the k-th row into il and jl */
1529     if (nzk-1 > 0){
1530       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1531       jl[k] = jl[i]; jl[i] = k;
1532       il[k] = ui[k] + 1;
1533     }
1534     ui_ptr[k] = current_space->array;
1535     current_space->array           += nzk;
1536     current_space->local_used      += nzk;
1537     current_space->local_remaining -= nzk;
1538 
1539     ui[k+1] = ui[k] + nzk;
1540   }
1541 
1542 #if defined(PETSC_USE_INFO)
1543   if (ai[am] != 0) {
1544     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1545     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1546     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1547     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1548   } else {
1549      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1550   }
1551 #endif
1552 
1553   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1554   ierr = PetscFree(jl);CHKERRQ(ierr);
1555 
1556   /* destroy list of free space and other temporary array(s) */
1557   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1558   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1559   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1560 
1561   /* put together the new matrix in MATSEQSBAIJ format */
1562   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1563   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1564   B    = *fact;
1565   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1566   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1567 
1568   b = (Mat_SeqSBAIJ*)B->data;
1569   b->singlemalloc = PETSC_FALSE;
1570   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1571   b->j    = uj;
1572   b->i    = ui;
1573   b->diag = 0;
1574   b->ilen = 0;
1575   b->imax = 0;
1576   b->row  = perm;
1577   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1578   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1579   b->icol = perm;
1580   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1581   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1582   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1583   b->maxnz = b->nz = ui[am];
1584 
1585   B->factor                 = FACTOR_CHOLESKY;
1586   B->info.factor_mallocs    = reallocs;
1587   B->info.fill_ratio_given  = fill;
1588   if (ai[am] != 0) {
1589     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1590   } else {
1591     B->info.fill_ratio_needed = 0.0;
1592   }
1593   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1594   if (perm_identity){
1595     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1596     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1597   }
1598   PetscFunctionReturn(0);
1599 }
1600