xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision dbbb2e4e304796ec221bf0d5ec5bdf6f874ca77c)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   PetscInt       *c,*r,*ic,i,n = A->rmap.n;
64   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65   PetscInt       *ordcol,*iwk,*iperm,*jw;
66   PetscInt       jmax,lfill,job,*o_i,*o_j;
67   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68   PetscReal      af;
69 
70   PetscFunctionBegin;
71 
72   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
73   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
75   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
76   lfill   = (PetscInt)(info->dtcount/2.0);
77   jmax    = (PetscInt)(info->fill*a->nz);
78 
79 
80   /* ------------------------------------------------------------
81      If reorder=.TRUE., then the original matrix has to be
82      reordered to reflect the user selected ordering scheme, and
83      then de-reordered so it is in it's original format.
84      Because Saad's dperm() is NOT in place, we have to copy
85      the original matrix and allocate more storage. . .
86      ------------------------------------------------------------
87   */
88 
89   /* set reorder to true if either isrow or iscol is not identity */
90   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
91   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
92   reorder = PetscNot(reorder);
93 
94 
95   /* storage for ilu factor */
96   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
99   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
100 
101   /* ------------------------------------------------------------
102      Make sure that everything is Fortran formatted (1-Based)
103      ------------------------------------------------------------
104   */
105   for (i=old_i[0];i<old_i[n];i++) {
106     old_j[i]++;
107   }
108   for(i=0;i<n+1;i++) {
109     old_i[i]++;
110   };
111 
112 
113   if (reorder) {
114     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
115     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
116     for(i=0;i<n;i++) {
117       r[i]  = r[i]+1;
118       c[i]  = c[i]+1;
119     }
120     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
122     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
123     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124     for (i=0;i<n;i++) {
125       r[i]  = r[i]-1;
126       c[i]  = c[i]-1;
127     }
128     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
129     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
130     o_a = old_a2;
131     o_j = old_j2;
132     o_i = old_i2;
133   } else {
134     o_a = old_a;
135     o_j = old_j;
136     o_i = old_i;
137   }
138 
139   /* ------------------------------------------------------------
140      Call Saad's ilutp() routine to generate the factorization
141      ------------------------------------------------------------
142   */
143 
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
145   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
146   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
147 
148   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149   if (sierr) {
150     switch (sierr) {
151       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
152       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
153       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157     }
158   }
159 
160   ierr = PetscFree(w);CHKERRQ(ierr);
161   ierr = PetscFree(jw);CHKERRQ(ierr);
162 
163   /* ------------------------------------------------------------
164      Saad's routine gives the result in Modified Sparse Row (msr)
165      Convert to Compressed Sparse Row format (csr)
166      ------------------------------------------------------------
167   */
168 
169   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
170   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
171 
172   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
173 
174   ierr = PetscFree(iwk);CHKERRQ(ierr);
175   ierr = PetscFree(wk);CHKERRQ(ierr);
176 
177   if (reorder) {
178     ierr = PetscFree(old_a2);CHKERRQ(ierr);
179     ierr = PetscFree(old_j2);CHKERRQ(ierr);
180     ierr = PetscFree(old_i2);CHKERRQ(ierr);
181   } else {
182     /* fix permutation of old_j that the factorization introduced */
183     for (i=old_i[0]; i<old_i[n]; i++) {
184       old_j[i-1] = iperm[old_j[i-1]-1];
185     }
186   }
187 
188   /* get rid of the shift to indices starting at 1 */
189   for (i=0; i<n+1; i++) {
190     old_i[i]--;
191   }
192   for (i=old_i[0];i<old_i[n];i++) {
193     old_j[i]--;
194   }
195 
196   /* Make the factored matrix 0-based */
197   for (i=0; i<n+1; i++) {
198     new_i[i]--;
199   }
200   for (i=new_i[0];i<new_i[n];i++) {
201     new_j[i]--;
202   }
203 
204   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205   /*-- permute the right-hand-side and solution vectors           --*/
206   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
207   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
208   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
209   for(i=0; i<n; i++) {
210     ordcol[i] = ic[iperm[i]-1];
211   };
212   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
213   ierr = ISDestroy(isicol);CHKERRQ(ierr);
214 
215   ierr = PetscFree(iperm);CHKERRQ(ierr);
216 
217   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
218   ierr = PetscFree(ordcol);CHKERRQ(ierr);
219 
220   /*----- put together the new matrix -----*/
221 
222   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
223   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
224   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
225   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
226   (*fact)->factor    = FACTOR_LU;
227   (*fact)->assembled = PETSC_TRUE;
228 
229   b = (Mat_SeqAIJ*)(*fact)->data;
230   b->free_a        = PETSC_TRUE;
231   b->free_ij       = PETSC_TRUE;
232   b->sorted        = PETSC_FALSE;
233   b->singlemalloc  = PETSC_FALSE;
234   b->a             = new_a;
235   b->j             = new_j;
236   b->i             = new_i;
237   b->ilen          = 0;
238   b->imax          = 0;
239   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240   b->row           = isirow;
241   b->col           = iscolf;
242   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
243   b->maxnz = b->nz = new_i[n];
244   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
245   (*fact)->info.factor_mallocs = 0;
246 
247   af = ((double)b->nz)/((double)a->nz) + .001;
248   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr);
249   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
250   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
251   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
252 
253   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
254 
255   PetscFunctionReturn(0);
256 #endif
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
261 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
262 {
263   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
264   IS                 isicol;
265   PetscErrorCode     ierr;
266   PetscInt           *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
267   PetscInt           *bi,*bj,*ajtmp;
268   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
269   PetscReal          f;
270   PetscInt           nlnk,*lnk,k,**bi_ptr;
271   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
272   PetscBT            lnkbt;
273 
274   PetscFunctionBegin;
275   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
276   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
277   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
278   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
279 
280   /* get new row pointers */
281   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
282   bi[0] = 0;
283 
284   /* bdiag is location of diagonal in factor */
285   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
286   bdiag[0] = 0;
287 
288   /* linked list for storing column indices of the active row */
289   nlnk = n + 1;
290   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
291 
292   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
293 
294   /* initial FreeSpace size is f*(ai[n]+1) */
295   f = info->fill;
296   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
297   current_space = free_space;
298 
299   for (i=0; i<n; i++) {
300     /* copy previous fill into linked list */
301     nzi = 0;
302     nnz = ai[r[i]+1] - ai[r[i]];
303     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
304     ajtmp = aj + ai[r[i]];
305     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
306     nzi += nlnk;
307 
308     /* add pivot rows into linked list */
309     row = lnk[n];
310     while (row < i) {
311       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
312       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
313       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
314       nzi += nlnk;
315       row  = lnk[row];
316     }
317     bi[i+1] = bi[i] + nzi;
318     im[i]   = nzi;
319 
320     /* mark bdiag */
321     nzbd = 0;
322     nnz  = nzi;
323     k    = lnk[n];
324     while (nnz-- && k < i){
325       nzbd++;
326       k = lnk[k];
327     }
328     bdiag[i] = bi[i] + nzbd;
329 
330     /* if free space is not available, make more free space */
331     if (current_space->local_remaining<nzi) {
332       nnz = (n - i)*nzi; /* estimated and max additional space needed */
333       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
334       reallocs++;
335     }
336 
337     /* copy data into free space, then initialize lnk */
338     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
339     bi_ptr[i] = current_space->array;
340     current_space->array           += nzi;
341     current_space->local_used      += nzi;
342     current_space->local_remaining -= nzi;
343   }
344 #if defined(PETSC_USE_INFO)
345   if (ai[n] != 0) {
346     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
347     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
348     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
349     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
350     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
351   } else {
352     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
353   }
354 #endif
355 
356   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
357   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
358 
359   /* destroy list of free space and other temporary array(s) */
360   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
361   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
362   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
363   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
364 
365   /* put together the new matrix */
366   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
367   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
368   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
369   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
370   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
371   b    = (Mat_SeqAIJ*)(*B)->data;
372   b->free_a       = PETSC_TRUE;
373   b->free_ij      = PETSC_TRUE;
374   b->singlemalloc = PETSC_FALSE;
375   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
376   b->j          = bj;
377   b->i          = bi;
378   b->diag       = bdiag;
379   b->ilen       = 0;
380   b->imax       = 0;
381   b->row        = isrow;
382   b->col        = iscol;
383   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
384   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
385   b->icol       = isicol;
386   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
387 
388   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
389   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
390   b->maxnz = b->nz = bi[n] ;
391 
392   (*B)->factor                 =  FACTOR_LU;
393   (*B)->info.factor_mallocs    = reallocs;
394   (*B)->info.fill_ratio_given  = f;
395 
396   if (ai[n] != 0) {
397     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
398   } else {
399     (*B)->info.fill_ratio_needed = 0.0;
400   }
401   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
402   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
403   PetscFunctionReturn(0);
404 }
405 
406 /*
407     Trouble in factorization, should we dump the original matrix?
408 */
409 #undef __FUNCT__
410 #define __FUNCT__ "MatFactorDumpMatrix"
411 PetscErrorCode MatFactorDumpMatrix(Mat A)
412 {
413   PetscErrorCode ierr;
414   PetscTruth     flg;
415 
416   PetscFunctionBegin;
417   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr);
418   if (flg) {
419     PetscViewer viewer;
420     char        filename[PETSC_MAX_PATH_LEN];
421 
422     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
423     ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
424     ierr = MatView(A,viewer);CHKERRQ(ierr);
425     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
426   }
427   PetscFunctionReturn(0);
428 }
429 
430 /* ----------------------------------------------------------- */
431 #undef __FUNCT__
432 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
433 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
434 {
435   Mat            C=*B;
436   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
437   IS             isrow = b->row,isicol = b->icol;
438   PetscErrorCode ierr;
439   PetscInt       *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
440   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
441   PetscInt       *diag_offset = b->diag,diag,*pj;
442   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
443   PetscScalar    d;
444   PetscReal      rs;
445   LUShift_Ctx    sctx;
446   PetscInt       newshift,*ddiag;
447 
448   PetscFunctionBegin;
449   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
450   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
451   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
452   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
453   rtmps = rtmp; ics = ic;
454 
455   sctx.shift_top  = 0;
456   sctx.nshift_max = 0;
457   sctx.shift_lo   = 0;
458   sctx.shift_hi   = 0;
459 
460   /* if both shift schemes are chosen by user, only use info->shiftpd */
461   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
462   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
463     PetscInt *aai = a->i;
464     ddiag          = a->diag;
465     sctx.shift_top = 0;
466     for (i=0; i<n; i++) {
467       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
468       d  = (a->a)[ddiag[i]];
469       rs = -PetscAbsScalar(d) - PetscRealPart(d);
470       v  = a->a+aai[i];
471       nz = aai[i+1] - aai[i];
472       for (j=0; j<nz; j++)
473 	rs += PetscAbsScalar(v[j]);
474       if (rs>sctx.shift_top) sctx.shift_top = rs;
475     }
476     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
477     sctx.shift_top    *= 1.1;
478     sctx.nshift_max   = 5;
479     sctx.shift_lo     = 0.;
480     sctx.shift_hi     = 1.;
481   }
482 
483   sctx.shift_amount = 0;
484   sctx.nshift       = 0;
485   do {
486     sctx.lushift = PETSC_FALSE;
487     for (i=0; i<n; i++){
488       nz    = bi[i+1] - bi[i];
489       bjtmp = bj + bi[i];
490       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
491 
492       /* load in initial (unfactored row) */
493       nz    = a->i[r[i]+1] - a->i[r[i]];
494       ajtmp = a->j + a->i[r[i]];
495       v     = a->a + a->i[r[i]];
496       for (j=0; j<nz; j++) {
497         rtmp[ics[ajtmp[j]]] = v[j];
498       }
499       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
500 
501       row = *bjtmp++;
502       while  (row < i) {
503         pc = rtmp + row;
504         if (*pc != 0.0) {
505           pv         = b->a + diag_offset[row];
506           pj         = b->j + diag_offset[row] + 1;
507           multiplier = *pc / *pv++;
508           *pc        = multiplier;
509           nz         = bi[row+1] - diag_offset[row] - 1;
510           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
511           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
512         }
513         row = *bjtmp++;
514       }
515       /* finished row so stick it into b->a */
516       pv   = b->a + bi[i] ;
517       pj   = b->j + bi[i] ;
518       nz   = bi[i+1] - bi[i];
519       diag = diag_offset[i] - bi[i];
520       rs   = 0.0;
521       for (j=0; j<nz; j++) {
522         pv[j] = rtmps[pj[j]];
523         if (j != diag) rs += PetscAbsScalar(pv[j]);
524       }
525 
526       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
527       sctx.rs  = rs;
528       sctx.pv  = pv[diag];
529       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
530       if (newshift == 1) break;
531     }
532 
533     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
534       /*
535        * if no shift in this attempt & shifting & started shifting & can refine,
536        * then try lower shift
537        */
538       sctx.shift_hi        = info->shift_fraction;
539       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
540       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
541       sctx.lushift         = PETSC_TRUE;
542       sctx.nshift++;
543     }
544   } while (sctx.lushift);
545 
546   /* invert diagonal entries for simplier triangular solves */
547   for (i=0; i<n; i++) {
548     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
549   }
550 
551   ierr = PetscFree(rtmp);CHKERRQ(ierr);
552   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
553   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
554   C->factor = FACTOR_LU;
555   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
556   C->assembled = PETSC_TRUE;
557   ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr);
558   if (sctx.nshift){
559     if (info->shiftnz) {
560       ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
561     } else if (info->shiftpd) {
562       ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
563     }
564   }
565   PetscFunctionReturn(0);
566 }
567 
568 /*
569    This routine implements inplace ILU(0) with row or/and column permutations.
570    Input:
571      A - original matrix
572    Output;
573      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
574          a->j (col index) is permuted by the inverse of colperm, then sorted
575          a->a reordered accordingly with a->j
576          a->diag (ptr to diagonal elements) is updated.
577 */
578 #undef __FUNCT__
579 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
580 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B)
581 {
582   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
583   IS             isrow = a->row,isicol = a->icol;
584   PetscErrorCode ierr;
585   PetscInt       *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j;
586   PetscInt       *ajtmp,nz,row,*ics;
587   PetscInt       *diag = a->diag,nbdiag,*pj;
588   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,d;
589   PetscReal      rs;
590   LUShift_Ctx    sctx;
591   PetscInt       newshift;
592 
593   PetscFunctionBegin;
594   if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
595   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
596   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
597   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
598   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
599   ics = ic;
600 
601   sctx.shift_top  = 0;
602   sctx.nshift_max = 0;
603   sctx.shift_lo   = 0;
604   sctx.shift_hi   = 0;
605 
606   /* if both shift schemes are chosen by user, only use info->shiftpd */
607   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
608   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
609     sctx.shift_top = 0;
610     for (i=0; i<n; i++) {
611       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
612       d  = (a->a)[diag[i]];
613       rs = -PetscAbsScalar(d) - PetscRealPart(d);
614       v  = a->a+ai[i];
615       nz = ai[i+1] - ai[i];
616       for (j=0; j<nz; j++)
617 	rs += PetscAbsScalar(v[j]);
618       if (rs>sctx.shift_top) sctx.shift_top = rs;
619     }
620     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
621     sctx.shift_top    *= 1.1;
622     sctx.nshift_max   = 5;
623     sctx.shift_lo     = 0.;
624     sctx.shift_hi     = 1.;
625   }
626 
627   sctx.shift_amount = 0;
628   sctx.nshift       = 0;
629   do {
630     sctx.lushift = PETSC_FALSE;
631     for (i=0; i<n; i++){
632       /* load in initial unfactored row */
633       nz    = ai[r[i]+1] - ai[r[i]];
634       ajtmp = aj + ai[r[i]];
635       v     = a->a + ai[r[i]];
636       /* sort permuted ajtmp and values v accordingly */
637       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
638       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
639 
640       diag[r[i]] = ai[r[i]];
641       for (j=0; j<nz; j++) {
642         rtmp[ajtmp[j]] = v[j];
643         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
644       }
645       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
646 
647       row = *ajtmp++;
648       while  (row < i) {
649         pc = rtmp + row;
650         if (*pc != 0.0) {
651           pv         = a->a + diag[r[row]];
652           pj         = aj + diag[r[row]] + 1;
653 
654           multiplier = *pc / *pv++;
655           *pc        = multiplier;
656           nz         = ai[r[row]+1] - diag[r[row]] - 1;
657           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
658           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
659         }
660         row = *ajtmp++;
661       }
662       /* finished row so overwrite it onto a->a */
663       pv   = a->a + ai[r[i]] ;
664       pj   = aj + ai[r[i]] ;
665       nz   = ai[r[i]+1] - ai[r[i]];
666       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
667 
668       rs   = 0.0;
669       for (j=0; j<nz; j++) {
670         pv[j] = rtmp[pj[j]];
671         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
672       }
673 
674       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
675       sctx.rs  = rs;
676       sctx.pv  = pv[nbdiag];
677       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
678       if (newshift == 1) break;
679     }
680 
681     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
682       /*
683        * if no shift in this attempt & shifting & started shifting & can refine,
684        * then try lower shift
685        */
686       sctx.shift_hi        = info->shift_fraction;
687       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
688       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
689       sctx.lushift         = PETSC_TRUE;
690       sctx.nshift++;
691     }
692   } while (sctx.lushift);
693 
694   /* invert diagonal entries for simplier triangular solves */
695   for (i=0; i<n; i++) {
696     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
697   }
698 
699   ierr = PetscFree(rtmp);CHKERRQ(ierr);
700   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
701   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
702   A->factor     = FACTOR_LU;
703   A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
704   A->assembled = PETSC_TRUE;
705   ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr);
706   if (sctx.nshift){
707     if (info->shiftnz) {
708       ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
709     } else if (info->shiftpd) {
710       ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
711     }
712   }
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
718 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
719 {
720   PetscFunctionBegin;
721   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
722   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
723   PetscFunctionReturn(0);
724 }
725 
726 
727 /* ----------------------------------------------------------- */
728 #undef __FUNCT__
729 #define __FUNCT__ "MatLUFactor_SeqAIJ"
730 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
731 {
732   PetscErrorCode ierr;
733   Mat            C;
734 
735   PetscFunctionBegin;
736   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
737   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
738   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
739   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 /* ----------------------------------------------------------- */
743 #undef __FUNCT__
744 #define __FUNCT__ "MatSolve_SeqAIJ"
745 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
746 {
747   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
748   IS             iscol = a->col,isrow = a->row;
749   PetscErrorCode ierr;
750   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
751   PetscInt       nz,*rout,*cout;
752   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
753 
754   PetscFunctionBegin;
755   if (!n) PetscFunctionReturn(0);
756 
757   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
758   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
759   tmp  = a->solve_work;
760 
761   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
762   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
763 
764   /* forward solve the lower triangular */
765   tmp[0] = b[*r++];
766   tmps   = tmp;
767   for (i=1; i<n; i++) {
768     v   = aa + ai[i] ;
769     vi  = aj + ai[i] ;
770     nz  = a->diag[i] - ai[i];
771     sum = b[*r++];
772     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
773     tmp[i] = sum;
774   }
775 
776   /* backward solve the upper triangular */
777   for (i=n-1; i>=0; i--){
778     v   = aa + a->diag[i] + 1;
779     vi  = aj + a->diag[i] + 1;
780     nz  = ai[i+1] - a->diag[i] - 1;
781     sum = tmp[i];
782     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
783     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
784   }
785 
786   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
787   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
788   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
789   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
790   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "MatMatSolve_SeqAIJ"
796 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
797 {
798   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
799   IS             iscol = a->col,isrow = a->row;
800   PetscErrorCode ierr;
801   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
802   PetscInt       nz,*rout,*cout,neq;
803   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
804 
805   PetscFunctionBegin;
806   if (!n) PetscFunctionReturn(0);
807 
808   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
809   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
810 
811   tmp  = a->solve_work;
812   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
813   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
814 
815   for (neq=0; neq<n; neq++){
816     /* forward solve the lower triangular */
817     tmp[0] = b[r[0]];
818     tmps   = tmp;
819     for (i=1; i<n; i++) {
820       v   = aa + ai[i] ;
821       vi  = aj + ai[i] ;
822       nz  = a->diag[i] - ai[i];
823       sum = b[r[i]];
824       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
825       tmp[i] = sum;
826     }
827     /* backward solve the upper triangular */
828     for (i=n-1; i>=0; i--){
829       v   = aa + a->diag[i] + 1;
830       vi  = aj + a->diag[i] + 1;
831       nz  = ai[i+1] - a->diag[i] - 1;
832       sum = tmp[i];
833       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
834       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
835     }
836 
837     b += n;
838     x += n;
839   }
840   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
841   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
842   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
843   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
844   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
850 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
851 {
852   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
853   IS             iscol = a->col,isrow = a->row;
854   PetscErrorCode ierr;
855   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
856   PetscInt       nz,*rout,*cout,row;
857   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
858 
859   PetscFunctionBegin;
860   if (!n) PetscFunctionReturn(0);
861 
862   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
863   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
864   tmp  = a->solve_work;
865 
866   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
867   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
868 
869   /* forward solve the lower triangular */
870   tmp[0] = b[*r++];
871   tmps   = tmp;
872   for (row=1; row<n; row++) {
873     i   = rout[row]; /* permuted row */
874     v   = aa + ai[i] ;
875     vi  = aj + ai[i] ;
876     nz  = a->diag[i] - ai[i];
877     sum = b[*r++];
878     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
879     tmp[row] = sum;
880   }
881 
882   /* backward solve the upper triangular */
883   for (row=n-1; row>=0; row--){
884     i   = rout[row]; /* permuted row */
885     v   = aa + a->diag[i] + 1;
886     vi  = aj + a->diag[i] + 1;
887     nz  = ai[i+1] - a->diag[i] - 1;
888     sum = tmp[row];
889     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
890     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
891   }
892 
893   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
894   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
895   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
896   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
897   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
898   PetscFunctionReturn(0);
899 }
900 
901 /* ----------------------------------------------------------- */
902 #undef __FUNCT__
903 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
904 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
905 {
906   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
907   PetscErrorCode ierr;
908   PetscInt       n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
909   PetscScalar    *x,*b,*aa = a->a;
910 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
911   PetscInt       adiag_i,i,*vi,nz,ai_i;
912   PetscScalar    *v,sum;
913 #endif
914 
915   PetscFunctionBegin;
916   if (!n) PetscFunctionReturn(0);
917 
918   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920 
921 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
922   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
923 #else
924   /* forward solve the lower triangular */
925   x[0] = b[0];
926   for (i=1; i<n; i++) {
927     ai_i = ai[i];
928     v    = aa + ai_i;
929     vi   = aj + ai_i;
930     nz   = adiag[i] - ai_i;
931     sum  = b[i];
932     while (nz--) sum -= *v++ * x[*vi++];
933     x[i] = sum;
934   }
935 
936   /* backward solve the upper triangular */
937   for (i=n-1; i>=0; i--){
938     adiag_i = adiag[i];
939     v       = aa + adiag_i + 1;
940     vi      = aj + adiag_i + 1;
941     nz      = ai[i+1] - adiag_i - 1;
942     sum     = x[i];
943     while (nz--) sum -= *v++ * x[*vi++];
944     x[i]    = sum*aa[adiag_i];
945   }
946 #endif
947   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
948   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
949   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
955 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
956 {
957   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
958   IS             iscol = a->col,isrow = a->row;
959   PetscErrorCode ierr;
960   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
961   PetscInt       nz,*rout,*cout;
962   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
963 
964   PetscFunctionBegin;
965   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
966 
967   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
968   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
969   tmp  = a->solve_work;
970 
971   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
972   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
973 
974   /* forward solve the lower triangular */
975   tmp[0] = b[*r++];
976   for (i=1; i<n; i++) {
977     v   = aa + ai[i] ;
978     vi  = aj + ai[i] ;
979     nz  = a->diag[i] - ai[i];
980     sum = b[*r++];
981     while (nz--) sum -= *v++ * tmp[*vi++ ];
982     tmp[i] = sum;
983   }
984 
985   /* backward solve the upper triangular */
986   for (i=n-1; i>=0; i--){
987     v   = aa + a->diag[i] + 1;
988     vi  = aj + a->diag[i] + 1;
989     nz  = ai[i+1] - a->diag[i] - 1;
990     sum = tmp[i];
991     while (nz--) sum -= *v++ * tmp[*vi++ ];
992     tmp[i] = sum*aa[a->diag[i]];
993     x[*c--] += tmp[i];
994   }
995 
996   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
997   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
998   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
999   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1000   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1001 
1002   PetscFunctionReturn(0);
1003 }
1004 /* -------------------------------------------------------------------*/
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1007 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1008 {
1009   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1010   IS             iscol = a->col,isrow = a->row;
1011   PetscErrorCode ierr;
1012   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1013   PetscInt       nz,*rout,*cout,*diag = a->diag;
1014   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
1015 
1016   PetscFunctionBegin;
1017   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1018   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1019   tmp  = a->solve_work;
1020 
1021   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1022   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1023 
1024   /* copy the b into temp work space according to permutation */
1025   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1026 
1027   /* forward solve the U^T */
1028   for (i=0; i<n; i++) {
1029     v   = aa + diag[i] ;
1030     vi  = aj + diag[i] + 1;
1031     nz  = ai[i+1] - diag[i] - 1;
1032     s1  = tmp[i];
1033     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1034     while (nz--) {
1035       tmp[*vi++ ] -= (*v++)*s1;
1036     }
1037     tmp[i] = s1;
1038   }
1039 
1040   /* backward solve the L^T */
1041   for (i=n-1; i>=0; i--){
1042     v   = aa + diag[i] - 1 ;
1043     vi  = aj + diag[i] - 1 ;
1044     nz  = diag[i] - ai[i];
1045     s1  = tmp[i];
1046     while (nz--) {
1047       tmp[*vi-- ] -= (*v--)*s1;
1048     }
1049   }
1050 
1051   /* copy tmp into x according to permutation */
1052   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1053 
1054   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1055   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1056   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1057   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1058 
1059   ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1065 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1066 {
1067   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1068   IS             iscol = a->col,isrow = a->row;
1069   PetscErrorCode ierr;
1070   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1071   PetscInt       nz,*rout,*cout,*diag = a->diag;
1072   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
1073 
1074   PetscFunctionBegin;
1075   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1076 
1077   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1078   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1079   tmp = a->solve_work;
1080 
1081   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1082   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1083 
1084   /* copy the b into temp work space according to permutation */
1085   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1086 
1087   /* forward solve the U^T */
1088   for (i=0; i<n; i++) {
1089     v   = aa + diag[i] ;
1090     vi  = aj + diag[i] + 1;
1091     nz  = ai[i+1] - diag[i] - 1;
1092     tmp[i] *= *v++;
1093     while (nz--) {
1094       tmp[*vi++ ] -= (*v++)*tmp[i];
1095     }
1096   }
1097 
1098   /* backward solve the L^T */
1099   for (i=n-1; i>=0; i--){
1100     v   = aa + diag[i] - 1 ;
1101     vi  = aj + diag[i] - 1 ;
1102     nz  = diag[i] - ai[i];
1103     while (nz--) {
1104       tmp[*vi-- ] -= (*v--)*tmp[i];
1105     }
1106   }
1107 
1108   /* copy tmp into x according to permutation */
1109   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1110 
1111   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1112   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1113   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1114   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1115 
1116   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 /* ----------------------------------------------------------------*/
1120 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*);
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1124 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
1125 {
1126   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1127   IS                 isicol;
1128   PetscErrorCode     ierr;
1129   PetscInt           *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d;
1130   PetscInt           *bi,*cols,nnz,*cols_lvl;
1131   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1132   PetscInt           i,levels,diagonal_fill;
1133   PetscTruth         col_identity,row_identity;
1134   PetscReal          f;
1135   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1136   PetscBT            lnkbt;
1137   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1138   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1139   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1140   PetscTruth         missing;
1141 
1142   PetscFunctionBegin;
1143   f             = info->fill;
1144   levels        = (PetscInt)info->levels;
1145   diagonal_fill = (PetscInt)info->diagonal_fill;
1146   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1147 
1148   /* special case that simply copies fill pattern */
1149   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1150   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1151   if (!levels && row_identity && col_identity) {
1152     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
1153     (*fact)->factor                 = FACTOR_LU;
1154     (*fact)->info.factor_mallocs    = 0;
1155     (*fact)->info.fill_ratio_given  = info->fill;
1156     (*fact)->info.fill_ratio_needed = 1.0;
1157     b               = (Mat_SeqAIJ*)(*fact)->data;
1158     ierr = MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);CHKERRQ(ierr);
1159     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1160     b->row              = isrow;
1161     b->col              = iscol;
1162     b->icol             = isicol;
1163     ierr                = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1164     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
1165     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1166     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1167     PetscFunctionReturn(0);
1168   }
1169 
1170   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1171   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1172 
1173   /* get new row pointers */
1174   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1175   bi[0] = 0;
1176   /* bdiag is location of diagonal in factor */
1177   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1178   bdiag[0]  = 0;
1179 
1180   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1181   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1182 
1183   /* create a linked list for storing column indices of the active row */
1184   nlnk = n + 1;
1185   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1186 
1187   /* initial FreeSpace size is f*(ai[n]+1) */
1188   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1189   current_space = free_space;
1190   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1191   current_space_lvl = free_space_lvl;
1192 
1193   for (i=0; i<n; i++) {
1194     nzi = 0;
1195     /* copy current row into linked list */
1196     nnz  = ai[r[i]+1] - ai[r[i]];
1197     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1198     cols = aj + ai[r[i]];
1199     lnk[i] = -1; /* marker to indicate if diagonal exists */
1200     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1201     nzi += nlnk;
1202 
1203     /* make sure diagonal entry is included */
1204     if (diagonal_fill && lnk[i] == -1) {
1205       fm = n;
1206       while (lnk[fm] < i) fm = lnk[fm];
1207       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1208       lnk[fm]    = i;
1209       lnk_lvl[i] = 0;
1210       nzi++; dcount++;
1211     }
1212 
1213     /* add pivot rows into the active row */
1214     nzbd = 0;
1215     prow = lnk[n];
1216     while (prow < i) {
1217       nnz      = bdiag[prow];
1218       cols     = bj_ptr[prow] + nnz + 1;
1219       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1220       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1221       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1222       nzi += nlnk;
1223       prow = lnk[prow];
1224       nzbd++;
1225     }
1226     bdiag[i] = nzbd;
1227     bi[i+1]  = bi[i] + nzi;
1228 
1229     /* if free space is not available, make more free space */
1230     if (current_space->local_remaining<nzi) {
1231       nnz = nzi*(n - i); /* estimated and max additional space needed */
1232       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1233       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1234       reallocs++;
1235     }
1236 
1237     /* copy data into free_space and free_space_lvl, then initialize lnk */
1238     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1239     bj_ptr[i]    = current_space->array;
1240     bjlvl_ptr[i] = current_space_lvl->array;
1241 
1242     /* make sure the active row i has diagonal entry */
1243     if (*(bj_ptr[i]+bdiag[i]) != i) {
1244       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1245     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1246     }
1247 
1248     current_space->array           += nzi;
1249     current_space->local_used      += nzi;
1250     current_space->local_remaining -= nzi;
1251     current_space_lvl->array           += nzi;
1252     current_space_lvl->local_used      += nzi;
1253     current_space_lvl->local_remaining -= nzi;
1254   }
1255 
1256   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1257   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1258 
1259   /* destroy list of free space and other temporary arrays */
1260   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1261   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1262   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1263   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1264   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1265 
1266 #if defined(PETSC_USE_INFO)
1267   {
1268     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1269     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1270     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1271     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1272     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1273     if (diagonal_fill) {
1274       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1275     }
1276   }
1277 #endif
1278 
1279   /* put together the new matrix */
1280   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
1281   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
1282   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1283   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1285   b = (Mat_SeqAIJ*)(*fact)->data;
1286   b->free_a       = PETSC_TRUE;
1287   b->free_ij      = PETSC_TRUE;
1288   b->singlemalloc = PETSC_FALSE;
1289   len = (bi[n] )*sizeof(PetscScalar);
1290   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1291   b->j          = bj;
1292   b->i          = bi;
1293   for (i=0; i<n; i++) bdiag[i] += bi[i];
1294   b->diag       = bdiag;
1295   b->ilen       = 0;
1296   b->imax       = 0;
1297   b->row        = isrow;
1298   b->col        = iscol;
1299   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1300   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1301   b->icol       = isicol;
1302   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1303   /* In b structure:  Free imax, ilen, old a, old j.
1304      Allocate bdiag, solve_work, new a, new j */
1305   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1306   b->maxnz             = b->nz = bi[n] ;
1307   (*fact)->factor = FACTOR_LU;
1308   (*fact)->info.factor_mallocs    = reallocs;
1309   (*fact)->info.fill_ratio_given  = f;
1310   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1311 
1312   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1313   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1314 
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #include "src/mat/impls/sbaij/seq/sbaij.h"
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1321 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1322 {
1323   Mat            C = *B;
1324   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1325   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1326   IS             ip=b->row,iip = b->icol;
1327   PetscErrorCode ierr;
1328   PetscInt       *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1329   PetscInt       *ai=a->i,*aj=a->j;
1330   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1331   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1332   PetscReal      zeropivot,rs,shiftnz;
1333   PetscReal      shiftpd;
1334   ChShift_Ctx    sctx;
1335   PetscInt       newshift;
1336 
1337   PetscFunctionBegin;
1338   shiftnz   = info->shiftnz;
1339   shiftpd   = info->shiftpd;
1340   zeropivot = info->zeropivot;
1341 
1342   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1343   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1344 
1345   /* initialization */
1346   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1347   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1348   jl   = il + mbs;
1349   rtmp = (MatScalar*)(jl + mbs);
1350 
1351   sctx.shift_amount = 0;
1352   sctx.nshift       = 0;
1353   do {
1354     sctx.chshift = PETSC_FALSE;
1355     for (i=0; i<mbs; i++) {
1356       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1357     }
1358 
1359     for (k = 0; k<mbs; k++){
1360       bval = ba + bi[k];
1361       /* initialize k-th row by the perm[k]-th row of A */
1362       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1363       for (j = jmin; j < jmax; j++){
1364         col = riip[aj[j]];
1365         if (col >= k){ /* only take upper triangular entry */
1366           rtmp[col] = aa[j];
1367           *bval++  = 0.0; /* for in-place factorization */
1368         }
1369       }
1370       /* shift the diagonal of the matrix */
1371       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1372 
1373       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1374       dk = rtmp[k];
1375       i = jl[k]; /* first row to be added to k_th row  */
1376 
1377       while (i < k){
1378         nexti = jl[i]; /* next row to be added to k_th row */
1379 
1380         /* compute multiplier, update diag(k) and U(i,k) */
1381         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1382         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1383         dk += uikdi*ba[ili];
1384         ba[ili] = uikdi; /* -U(i,k) */
1385 
1386         /* add multiple of row i to k-th row */
1387         jmin = ili + 1; jmax = bi[i+1];
1388         if (jmin < jmax){
1389           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1390           /* update il and jl for row i */
1391           il[i] = jmin;
1392           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1393         }
1394         i = nexti;
1395       }
1396 
1397       /* shift the diagonals when zero pivot is detected */
1398       /* compute rs=sum of abs(off-diagonal) */
1399       rs   = 0.0;
1400       jmin = bi[k]+1;
1401       nz   = bi[k+1] - jmin;
1402       if (nz > 0){
1403         bcol = bj + jmin;
1404         while (nz--){
1405           rs += PetscAbsScalar(rtmp[*bcol]);
1406           bcol++;
1407         }
1408       }
1409 
1410       sctx.rs = rs;
1411       sctx.pv = dk;
1412       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1413       if (newshift == 1) break;
1414 
1415       /* copy data into U(k,:) */
1416       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1417       jmin = bi[k]+1; jmax = bi[k+1];
1418       if (jmin < jmax) {
1419         for (j=jmin; j<jmax; j++){
1420           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1421         }
1422         /* add the k-th row into il and jl */
1423         il[k] = jmin;
1424         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1425       }
1426     }
1427   } while (sctx.chshift);
1428   ierr = PetscFree(il);CHKERRQ(ierr);
1429 
1430   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1431   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1432   C->factor       = FACTOR_CHOLESKY;
1433   C->assembled    = PETSC_TRUE;
1434   C->preallocated = PETSC_TRUE;
1435   ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr);
1436   if (sctx.nshift){
1437     if (shiftnz) {
1438       ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1439     } else if (shiftpd) {
1440       ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1441     }
1442   }
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1448 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1449 {
1450   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1451   Mat_SeqSBAIJ       *b;
1452   Mat                B;
1453   PetscErrorCode     ierr;
1454   PetscTruth         perm_identity;
1455   PetscInt           reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1456   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1457   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1458   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1459   PetscReal          fill=info->fill,levels=info->levels;
1460   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1461   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1462   PetscBT            lnkbt;
1463 
1464   PetscFunctionBegin;
1465   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1466   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1467 
1468   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1469   ui[0] = 0;
1470 
1471   /* special case that simply copies fill pattern */
1472   if (!levels && perm_identity) {
1473     for (i=0; i<am; i++) {
1474       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1475     }
1476     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1477     cols = uj;
1478     for (i=0; i<am; i++) {
1479       aj    = a->j + a->diag[i];
1480       ncols = ui[i+1] - ui[i];
1481       for (j=0; j<ncols; j++) *cols++ = *aj++;
1482     }
1483   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1484     /* initialization */
1485     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1486 
1487     /* jl: linked list for storing indices of the pivot rows
1488        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1489     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1490     il         = jl + am;
1491     uj_ptr     = (PetscInt**)(il + am);
1492     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1493     for (i=0; i<am; i++){
1494       jl[i] = am; il[i] = 0;
1495     }
1496 
1497     /* create and initialize a linked list for storing column indices of the active row k */
1498     nlnk = am + 1;
1499     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1500 
1501     /* initial FreeSpace size is fill*(ai[am]+1) */
1502     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1503     current_space = free_space;
1504     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1505     current_space_lvl = free_space_lvl;
1506 
1507     for (k=0; k<am; k++){  /* for each active row k */
1508       /* initialize lnk by the column indices of row rip[k] of A */
1509       nzk   = 0;
1510       ncols = ai[rip[k]+1] - ai[rip[k]];
1511       ncols_upper = 0;
1512       for (j=0; j<ncols; j++){
1513         i = *(aj + ai[rip[k]] + j);
1514         if (rip[i] >= k){ /* only take upper triangular entry */
1515           ajtmp[ncols_upper] = i;
1516           ncols_upper++;
1517         }
1518       }
1519       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1520       nzk += nlnk;
1521 
1522       /* update lnk by computing fill-in for each pivot row to be merged in */
1523       prow = jl[k]; /* 1st pivot row */
1524 
1525       while (prow < k){
1526         nextprow = jl[prow];
1527 
1528         /* merge prow into k-th row */
1529         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1530         jmax = ui[prow+1];
1531         ncols = jmax-jmin;
1532         i     = jmin - ui[prow];
1533         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1534         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1535         j     = *(uj - 1);
1536         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1537         nzk += nlnk;
1538 
1539         /* update il and jl for prow */
1540         if (jmin < jmax){
1541           il[prow] = jmin;
1542           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1543         }
1544         prow = nextprow;
1545       }
1546 
1547       /* if free space is not available, make more free space */
1548       if (current_space->local_remaining<nzk) {
1549         i = am - k + 1; /* num of unfactored rows */
1550         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1551         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1552         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1553         reallocs++;
1554       }
1555 
1556       /* copy data into free_space and free_space_lvl, then initialize lnk */
1557       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1558 
1559       /* add the k-th row into il and jl */
1560       if (nzk > 1){
1561         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1562         jl[k] = jl[i]; jl[i] = k;
1563         il[k] = ui[k] + 1;
1564       }
1565       uj_ptr[k]     = current_space->array;
1566       uj_lvl_ptr[k] = current_space_lvl->array;
1567 
1568       current_space->array           += nzk;
1569       current_space->local_used      += nzk;
1570       current_space->local_remaining -= nzk;
1571 
1572       current_space_lvl->array           += nzk;
1573       current_space_lvl->local_used      += nzk;
1574       current_space_lvl->local_remaining -= nzk;
1575 
1576       ui[k+1] = ui[k] + nzk;
1577     }
1578 
1579 #if defined(PETSC_USE_INFO)
1580     if (ai[am] != 0) {
1581       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1582       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1583       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1584       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1585     } else {
1586       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1587     }
1588 #endif
1589 
1590     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1591     ierr = PetscFree(jl);CHKERRQ(ierr);
1592     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1593 
1594     /* destroy list of free space and other temporary array(s) */
1595     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1596     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1597     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1598     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1599 
1600   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1601 
1602   /* put together the new matrix in MATSEQSBAIJ format */
1603   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1604   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1605   B = *fact;
1606   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1607   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1608 
1609   b    = (Mat_SeqSBAIJ*)B->data;
1610   b->singlemalloc = PETSC_FALSE;
1611   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1612   b->j    = uj;
1613   b->i    = ui;
1614   b->diag = 0;
1615   b->ilen = 0;
1616   b->imax = 0;
1617   b->row  = perm;
1618   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1619   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1620   b->icol = perm;
1621   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1622   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1623   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1624   b->maxnz = b->nz = ui[am];
1625   b->free_a  = PETSC_TRUE;
1626   b->free_ij = PETSC_TRUE;
1627 
1628   B->factor                 = FACTOR_CHOLESKY;
1629   B->info.factor_mallocs    = reallocs;
1630   B->info.fill_ratio_given  = fill;
1631   if (ai[am] != 0) {
1632     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1633   } else {
1634     B->info.fill_ratio_needed = 0.0;
1635   }
1636   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1637   if (perm_identity){
1638     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1639     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1640   }
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1646 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1647 {
1648   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1649   Mat_SeqSBAIJ       *b;
1650   Mat                B;
1651   PetscErrorCode     ierr;
1652   PetscTruth         perm_identity;
1653   PetscReal          fill = info->fill;
1654   PetscInt           *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1655   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1656   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1657   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1658   PetscBT            lnkbt;
1659   IS                 iperm;
1660 
1661   PetscFunctionBegin;
1662   /* check whether perm is the identity mapping */
1663   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1664   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1665   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1666   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1667 
1668   /* initialization */
1669   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1670   ui[0] = 0;
1671 
1672   /* jl: linked list for storing indices of the pivot rows
1673      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1674   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1675   il     = jl + am;
1676   cols   = il + am;
1677   ui_ptr = (PetscInt**)(cols + am);
1678   for (i=0; i<am; i++){
1679     jl[i] = am; il[i] = 0;
1680   }
1681 
1682   /* create and initialize a linked list for storing column indices of the active row k */
1683   nlnk = am + 1;
1684   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1685 
1686   /* initial FreeSpace size is fill*(ai[am]+1) */
1687   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1688   current_space = free_space;
1689 
1690   for (k=0; k<am; k++){  /* for each active row k */
1691     /* initialize lnk by the column indices of row rip[k] of A */
1692     nzk   = 0;
1693     ncols = ai[rip[k]+1] - ai[rip[k]];
1694     if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1695     ncols_upper = 0;
1696     for (j=0; j<ncols; j++){
1697       i = riip[*(aj + ai[rip[k]] + j)];
1698       if (i >= k){ /* only take upper triangular entry */
1699         cols[ncols_upper] = i;
1700         ncols_upper++;
1701       }
1702     }
1703     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1704     nzk += nlnk;
1705 
1706     /* update lnk by computing fill-in for each pivot row to be merged in */
1707     prow = jl[k]; /* 1st pivot row */
1708 
1709     while (prow < k){
1710       nextprow = jl[prow];
1711       /* merge prow into k-th row */
1712       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1713       jmax = ui[prow+1];
1714       ncols = jmax-jmin;
1715       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1716       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1717       nzk += nlnk;
1718 
1719       /* update il and jl for prow */
1720       if (jmin < jmax){
1721         il[prow] = jmin;
1722         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1723       }
1724       prow = nextprow;
1725     }
1726 
1727     /* if free space is not available, make more free space */
1728     if (current_space->local_remaining<nzk) {
1729       i = am - k + 1; /* num of unfactored rows */
1730       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1731       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1732       reallocs++;
1733     }
1734 
1735     /* copy data into free space, then initialize lnk */
1736     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1737 
1738     /* add the k-th row into il and jl */
1739     if (nzk-1 > 0){
1740       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1741       jl[k] = jl[i]; jl[i] = k;
1742       il[k] = ui[k] + 1;
1743     }
1744     ui_ptr[k] = current_space->array;
1745     current_space->array           += nzk;
1746     current_space->local_used      += nzk;
1747     current_space->local_remaining -= nzk;
1748 
1749     ui[k+1] = ui[k] + nzk;
1750   }
1751 
1752 #if defined(PETSC_USE_INFO)
1753   if (ai[am] != 0) {
1754     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1755     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1756     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1757     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1758   } else {
1759      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1760   }
1761 #endif
1762 
1763   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1764   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1765   ierr = PetscFree(jl);CHKERRQ(ierr);
1766 
1767   /* destroy list of free space and other temporary array(s) */
1768   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1769   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1770   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1771 
1772   /* put together the new matrix in MATSEQSBAIJ format */
1773   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1774   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1775   B    = *fact;
1776   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1777   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1778 
1779   b = (Mat_SeqSBAIJ*)B->data;
1780   b->singlemalloc = PETSC_FALSE;
1781   b->free_a       = PETSC_TRUE;
1782   b->free_ij      = PETSC_TRUE;
1783   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1784   b->j    = uj;
1785   b->i    = ui;
1786   b->diag = 0;
1787   b->ilen = 0;
1788   b->imax = 0;
1789   b->row  = perm;
1790   b->col  = perm;
1791   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1792   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1793   b->icol = iperm;
1794   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1795   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1796   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1797   b->maxnz = b->nz = ui[am];
1798 
1799   B->factor                 = FACTOR_CHOLESKY;
1800   B->info.factor_mallocs    = reallocs;
1801   B->info.fill_ratio_given  = fill;
1802   if (ai[am] != 0) {
1803     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1804   } else {
1805     B->info.fill_ratio_needed = 0.0;
1806   }
1807   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1808   if (perm_identity){
1809     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1810     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1811   }
1812   PetscFunctionReturn(0);
1813 }
1814