xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision a4efd8ea4bb0600a5aef92b2c23cbfa314827193)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   PetscInt       *c,*r,*ic,i,n = A->rmap->n;
64   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65   PetscInt       *ordcol,*iwk,*iperm,*jw;
66   PetscInt       jmax,lfill,job,*o_i,*o_j;
67   MatScalar      *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68   PetscReal      af;
69 
70   PetscFunctionBegin;
71 
72   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
73   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
75   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
76   lfill   = (PetscInt)(info->dtcount/2.0);
77   jmax    = (PetscInt)(info->fill*a->nz);
78 
79 
80   /* ------------------------------------------------------------
81      If reorder=.TRUE., then the original matrix has to be
82      reordered to reflect the user selected ordering scheme, and
83      then de-reordered so it is in it's original format.
84      Because Saad's dperm() is NOT in place, we have to copy
85      the original matrix and allocate more storage. . .
86      ------------------------------------------------------------
87   */
88 
89   /* set reorder to true if either isrow or iscol is not identity */
90   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
91   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
92   reorder = PetscNot(reorder);
93 
94 
95   /* storage for ilu factor */
96   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr);
99   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
100 
101   /* ------------------------------------------------------------
102      Make sure that everything is Fortran formatted (1-Based)
103      ------------------------------------------------------------
104   */
105   for (i=old_i[0];i<old_i[n];i++) {
106     old_j[i]++;
107   }
108   for(i=0;i<n+1;i++) {
109     old_i[i]++;
110   };
111 
112 
113   if (reorder) {
114     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
115     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
116     for(i=0;i<n;i++) {
117       r[i]  = r[i]+1;
118       c[i]  = c[i]+1;
119     }
120     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
122     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr);
123     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124     for (i=0;i<n;i++) {
125       r[i]  = r[i]-1;
126       c[i]  = c[i]-1;
127     }
128     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
129     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
130     o_a = old_a2;
131     o_j = old_j2;
132     o_i = old_i2;
133   } else {
134     o_a = old_a;
135     o_j = old_j;
136     o_i = old_i;
137   }
138 
139   /* ------------------------------------------------------------
140      Call Saad's ilutp() routine to generate the factorization
141      ------------------------------------------------------------
142   */
143 
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
145   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
146   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
147 
148   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149   if (sierr) {
150     switch (sierr) {
151       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
152       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
153       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157     }
158   }
159 
160   ierr = PetscFree(w);CHKERRQ(ierr);
161   ierr = PetscFree(jw);CHKERRQ(ierr);
162 
163   /* ------------------------------------------------------------
164      Saad's routine gives the result in Modified Sparse Row (msr)
165      Convert to Compressed Sparse Row format (csr)
166      ------------------------------------------------------------
167   */
168 
169   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
170   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
171 
172   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
173 
174   ierr = PetscFree(iwk);CHKERRQ(ierr);
175   ierr = PetscFree(wk);CHKERRQ(ierr);
176 
177   if (reorder) {
178     ierr = PetscFree(old_a2);CHKERRQ(ierr);
179     ierr = PetscFree(old_j2);CHKERRQ(ierr);
180     ierr = PetscFree(old_i2);CHKERRQ(ierr);
181   } else {
182     /* fix permutation of old_j that the factorization introduced */
183     for (i=old_i[0]; i<old_i[n]; i++) {
184       old_j[i-1] = iperm[old_j[i-1]-1];
185     }
186   }
187 
188   /* get rid of the shift to indices starting at 1 */
189   for (i=0; i<n+1; i++) {
190     old_i[i]--;
191   }
192   for (i=old_i[0];i<old_i[n];i++) {
193     old_j[i]--;
194   }
195 
196   /* Make the factored matrix 0-based */
197   for (i=0; i<n+1; i++) {
198     new_i[i]--;
199   }
200   for (i=new_i[0];i<new_i[n];i++) {
201     new_j[i]--;
202   }
203 
204   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205   /*-- permute the right-hand-side and solution vectors           --*/
206   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
207   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
208   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
209   for(i=0; i<n; i++) {
210     ordcol[i] = ic[iperm[i]-1];
211   };
212   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
213   ierr = ISDestroy(isicol);CHKERRQ(ierr);
214 
215   ierr = PetscFree(iperm);CHKERRQ(ierr);
216 
217   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
218   ierr = PetscFree(ordcol);CHKERRQ(ierr);
219 
220   /*----- put together the new matrix -----*/
221 
222   ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr);
223   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
224   ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr);
225   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
226   (*fact)->factor    = MAT_FACTOR_LU;
227   (*fact)->assembled = PETSC_TRUE;
228 
229   b = (Mat_SeqAIJ*)(*fact)->data;
230   b->free_a        = PETSC_TRUE;
231   b->free_ij       = PETSC_TRUE;
232   b->singlemalloc  = PETSC_FALSE;
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   af = ((double)b->nz)/((double)a->nz) + .001;
247   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr);
248   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
249   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
250   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
251 
252   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
253 
254   PetscFunctionReturn(0);
255 #endif
256 }
257 
258 EXTERN_C_BEGIN
259 #undef __FUNCT__
260 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
261 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
262 {
263   PetscFunctionBegin;
264   *flg = PETSC_TRUE;
265   PetscFunctionReturn(0);
266 }
267 EXTERN_C_END
268 
269 EXTERN_C_BEGIN
270 #undef __FUNCT__
271 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
272 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
273 {
274   PetscInt           n = A->rmap->n;
275   PetscErrorCode     ierr;
276 
277   PetscFunctionBegin;
278   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
279   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
280   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
281     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
282     (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
283     (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ;
284   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
285     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
286     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
287     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
288     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
289   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
290   (*B)->factor = ftype;
291   PetscFunctionReturn(0);
292 }
293 EXTERN_C_END
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
297 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
298 {
299   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
300   IS                 isicol;
301   PetscErrorCode     ierr;
302   PetscInt           *r,*ic,i,n=A->rmap->n,*ai=a->i,*aj=a->j;
303   PetscInt           *bi,*bj,*ajtmp;
304   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
305   PetscReal          f;
306   PetscInt           nlnk,*lnk,k,**bi_ptr;
307   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
308   PetscBT            lnkbt;
309 
310   PetscFunctionBegin;
311   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
312   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
313   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
314   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
315 
316   /* get new row pointers */
317   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
318   bi[0] = 0;
319 
320   /* bdiag is location of diagonal in factor */
321   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
322   bdiag[0] = 0;
323 
324   /* linked list for storing column indices of the active row */
325   nlnk = n + 1;
326   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
327 
328   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
329 
330   /* initial FreeSpace size is f*(ai[n]+1) */
331   f = info->fill;
332   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
333   current_space = free_space;
334 
335   for (i=0; i<n; i++) {
336     /* copy previous fill into linked list */
337     nzi = 0;
338     nnz = ai[r[i]+1] - ai[r[i]];
339     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
340     ajtmp = aj + ai[r[i]];
341     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
342     nzi += nlnk;
343 
344     /* add pivot rows into linked list */
345     row = lnk[n];
346     while (row < i) {
347       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
348       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
349       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
350       nzi += nlnk;
351       row  = lnk[row];
352     }
353     bi[i+1] = bi[i] + nzi;
354     im[i]   = nzi;
355 
356     /* mark bdiag */
357     nzbd = 0;
358     nnz  = nzi;
359     k    = lnk[n];
360     while (nnz-- && k < i){
361       nzbd++;
362       k = lnk[k];
363     }
364     bdiag[i] = bi[i] + nzbd;
365 
366     /* if free space is not available, make more free space */
367     if (current_space->local_remaining<nzi) {
368       nnz = (n - i)*nzi; /* estimated and max additional space needed */
369       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
370       reallocs++;
371     }
372 
373     /* copy data into free space, then initialize lnk */
374     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
375     bi_ptr[i] = current_space->array;
376     current_space->array           += nzi;
377     current_space->local_used      += nzi;
378     current_space->local_remaining -= nzi;
379   }
380 #if defined(PETSC_USE_INFO)
381   if (ai[n] != 0) {
382     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
383     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
384     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
385     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
386     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
387   } else {
388     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
389   }
390 #endif
391 
392   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
393   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
394 
395   /* destroy list of free space and other temporary array(s) */
396   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
397   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
398   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
399   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
400 
401   /* put together the new matrix */
402   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
403   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
404   b    = (Mat_SeqAIJ*)(*B)->data;
405   b->free_a       = PETSC_TRUE;
406   b->free_ij      = PETSC_TRUE;
407   b->singlemalloc = PETSC_FALSE;
408   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
409   b->j          = bj;
410   b->i          = bi;
411   b->diag       = bdiag;
412   b->ilen       = 0;
413   b->imax       = 0;
414   b->row        = isrow;
415   b->col        = iscol;
416   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
417   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
418   b->icol       = isicol;
419   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
420 
421   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
422   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
423   b->maxnz = b->nz = bi[n] ;
424 
425   (*B)->factor                 =  MAT_FACTOR_LU;
426   (*B)->info.factor_mallocs    = reallocs;
427   (*B)->info.fill_ratio_given  = f;
428 
429   if (ai[n] != 0) {
430     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
431   } else {
432     (*B)->info.fill_ratio_needed = 0.0;
433   }
434   (*B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
435   (*B)->ops->solve            = MatSolve_SeqAIJ;
436   (*B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
437   /* switch to inodes if appropriate */
438   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 /*
443     Trouble in factorization, should we dump the original matrix?
444 */
445 #undef __FUNCT__
446 #define __FUNCT__ "MatFactorDumpMatrix"
447 PetscErrorCode MatFactorDumpMatrix(Mat A)
448 {
449   PetscErrorCode ierr;
450   PetscTruth     flg;
451 
452   PetscFunctionBegin;
453   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr);
454   if (flg) {
455     PetscViewer viewer;
456     char        filename[PETSC_MAX_PATH_LEN];
457 
458     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
459     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
460     ierr = MatView(A,viewer);CHKERRQ(ierr);
461     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
462   }
463   PetscFunctionReturn(0);
464 }
465 
466 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
467 
468 /* ----------------------------------------------------------- */
469 #undef __FUNCT__
470 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
471 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
472 {
473   Mat            C=*B;
474   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
475   IS             isrow = b->row,isicol = b->icol;
476   PetscErrorCode ierr;
477   PetscInt       *r,*ic,i,j,n=A->rmap->n,*bi=b->i,*bj=b->j;
478   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
479   PetscInt       *diag_offset = b->diag,diag,*pj;
480   PetscScalar    *rtmp,*pc,multiplier,*rtmps;
481   MatScalar      *v,*pv;
482   PetscScalar    d;
483   PetscReal      rs;
484   LUShift_Ctx    sctx;
485   PetscInt       newshift,*ddiag;
486 
487   PetscFunctionBegin;
488   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
489   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
490   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
491   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
492   rtmps = rtmp; ics = ic;
493 
494   sctx.shift_top  = 0;
495   sctx.nshift_max = 0;
496   sctx.shift_lo   = 0;
497   sctx.shift_hi   = 0;
498 
499   /* if both shift schemes are chosen by user, only use info->shiftpd */
500   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
501   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
502     PetscInt *aai = a->i;
503     ddiag          = a->diag;
504     sctx.shift_top = 0;
505     for (i=0; i<n; i++) {
506       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
507       d  = (a->a)[ddiag[i]];
508       rs = -PetscAbsScalar(d) - PetscRealPart(d);
509       v  = a->a+aai[i];
510       nz = aai[i+1] - aai[i];
511       for (j=0; j<nz; j++)
512 	rs += PetscAbsScalar(v[j]);
513       if (rs>sctx.shift_top) sctx.shift_top = rs;
514     }
515     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
516     sctx.shift_top    *= 1.1;
517     sctx.nshift_max   = 5;
518     sctx.shift_lo     = 0.;
519     sctx.shift_hi     = 1.;
520   }
521 
522   sctx.shift_amount = 0;
523   sctx.nshift       = 0;
524   do {
525     sctx.lushift = PETSC_FALSE;
526     for (i=0; i<n; i++){
527       nz    = bi[i+1] - bi[i];
528       bjtmp = bj + bi[i];
529       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
530 
531       /* load in initial (unfactored row) */
532       nz    = a->i[r[i]+1] - a->i[r[i]];
533       ajtmp = a->j + a->i[r[i]];
534       v     = a->a + a->i[r[i]];
535       for (j=0; j<nz; j++) {
536         rtmp[ics[ajtmp[j]]] = v[j];
537       }
538       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
539 
540       row = *bjtmp++;
541       while  (row < i) {
542         pc = rtmp + row;
543         if (*pc != 0.0) {
544           pv         = b->a + diag_offset[row];
545           pj         = b->j + diag_offset[row] + 1;
546           multiplier = *pc / *pv++;
547           *pc        = multiplier;
548           nz         = bi[row+1] - diag_offset[row] - 1;
549           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
550           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
551         }
552         row = *bjtmp++;
553       }
554       /* finished row so stick it into b->a */
555       pv   = b->a + bi[i] ;
556       pj   = b->j + bi[i] ;
557       nz   = bi[i+1] - bi[i];
558       diag = diag_offset[i] - bi[i];
559       rs   = 0.0;
560       for (j=0; j<nz; j++) {
561         pv[j] = rtmps[pj[j]];
562         if (j != diag) rs += PetscAbsScalar(pv[j]);
563       }
564 
565       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
566       sctx.rs  = rs;
567       sctx.pv  = pv[diag];
568       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
569       if (newshift == 1) break;
570     }
571 
572     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
573       /*
574        * if no shift in this attempt & shifting & started shifting & can refine,
575        * then try lower shift
576        */
577       sctx.shift_hi        = info->shift_fraction;
578       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
579       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
580       sctx.lushift         = PETSC_TRUE;
581       sctx.nshift++;
582     }
583   } while (sctx.lushift);
584 
585   /* invert diagonal entries for simplier triangular solves */
586   for (i=0; i<n; i++) {
587     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
588   }
589   ierr = PetscFree(rtmp);CHKERRQ(ierr);
590   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
591   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
592   if (b->inode.use) {
593     C->ops->solve   = MatSolve_Inode;
594   } else {
595     C->ops->solve   = MatSolve_SeqAIJ;
596   }
597   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
598   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
599   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
600   C->ops->matsolve           = MatMatSolve_SeqAIJ;
601   C->assembled    = PETSC_TRUE;
602   C->preallocated = PETSC_TRUE;
603   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
604   if (sctx.nshift){
605     if (info->shiftnz) {
606       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
607     } else if (info->shiftpd) {
608       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
609     }
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 /*
615    This routine implements inplace ILU(0) with row or/and column permutations.
616    Input:
617      A - original matrix
618    Output;
619      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
620          a->j (col index) is permuted by the inverse of colperm, then sorted
621          a->a reordered accordingly with a->j
622          a->diag (ptr to diagonal elements) is updated.
623 */
624 #undef __FUNCT__
625 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
626 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B)
627 {
628   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
629   IS             isrow = a->row,isicol = a->icol;
630   PetscErrorCode ierr;
631   PetscInt       *r,*ic,i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
632   PetscInt       *ajtmp,nz,row,*ics;
633   PetscInt       *diag = a->diag,nbdiag,*pj;
634   PetscScalar    *rtmp,*pc,multiplier,d;
635   MatScalar      *v,*pv;
636   PetscReal      rs;
637   LUShift_Ctx    sctx;
638   PetscInt       newshift;
639 
640   PetscFunctionBegin;
641   if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
642   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
643   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
644   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
645   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
646   ics = ic;
647 
648   sctx.shift_top  = 0;
649   sctx.nshift_max = 0;
650   sctx.shift_lo   = 0;
651   sctx.shift_hi   = 0;
652 
653   /* if both shift schemes are chosen by user, only use info->shiftpd */
654   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
655   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
656     sctx.shift_top = 0;
657     for (i=0; i<n; i++) {
658       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
659       d  = (a->a)[diag[i]];
660       rs = -PetscAbsScalar(d) - PetscRealPart(d);
661       v  = a->a+ai[i];
662       nz = ai[i+1] - ai[i];
663       for (j=0; j<nz; j++)
664 	rs += PetscAbsScalar(v[j]);
665       if (rs>sctx.shift_top) sctx.shift_top = rs;
666     }
667     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
668     sctx.shift_top    *= 1.1;
669     sctx.nshift_max   = 5;
670     sctx.shift_lo     = 0.;
671     sctx.shift_hi     = 1.;
672   }
673 
674   sctx.shift_amount = 0;
675   sctx.nshift       = 0;
676   do {
677     sctx.lushift = PETSC_FALSE;
678     for (i=0; i<n; i++){
679       /* load in initial unfactored row */
680       nz    = ai[r[i]+1] - ai[r[i]];
681       ajtmp = aj + ai[r[i]];
682       v     = a->a + ai[r[i]];
683       /* sort permuted ajtmp and values v accordingly */
684       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
685       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
686 
687       diag[r[i]] = ai[r[i]];
688       for (j=0; j<nz; j++) {
689         rtmp[ajtmp[j]] = v[j];
690         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
691       }
692       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
693 
694       row = *ajtmp++;
695       while  (row < i) {
696         pc = rtmp + row;
697         if (*pc != 0.0) {
698           pv         = a->a + diag[r[row]];
699           pj         = aj + diag[r[row]] + 1;
700 
701           multiplier = *pc / *pv++;
702           *pc        = multiplier;
703           nz         = ai[r[row]+1] - diag[r[row]] - 1;
704           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
705           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
706         }
707         row = *ajtmp++;
708       }
709       /* finished row so overwrite it onto a->a */
710       pv   = a->a + ai[r[i]] ;
711       pj   = aj + ai[r[i]] ;
712       nz   = ai[r[i]+1] - ai[r[i]];
713       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
714 
715       rs   = 0.0;
716       for (j=0; j<nz; j++) {
717         pv[j] = rtmp[pj[j]];
718         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
719       }
720 
721       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
722       sctx.rs  = rs;
723       sctx.pv  = pv[nbdiag];
724       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
725       if (newshift == 1) break;
726     }
727 
728     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
729       /*
730        * if no shift in this attempt & shifting & started shifting & can refine,
731        * then try lower shift
732        */
733       sctx.shift_hi        = info->shift_fraction;
734       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
735       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
736       sctx.lushift         = PETSC_TRUE;
737       sctx.nshift++;
738     }
739   } while (sctx.lushift);
740 
741   /* invert diagonal entries for simplier triangular solves */
742   for (i=0; i<n; i++) {
743     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
744   }
745 
746   ierr = PetscFree(rtmp);CHKERRQ(ierr);
747   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
748   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
749   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
750   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
751   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
752   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
753   A->assembled = PETSC_TRUE;
754   A->preallocated = PETSC_TRUE;
755   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
756   if (sctx.nshift){
757     if (info->shiftnz) {
758       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
759     } else if (info->shiftpd) {
760       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
761     }
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 /* ----------------------------------------------------------- */
767 #undef __FUNCT__
768 #define __FUNCT__ "MatLUFactor_SeqAIJ"
769 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
770 {
771   PetscErrorCode ierr;
772   Mat            C;
773 
774   PetscFunctionBegin;
775   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
776   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
777   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
778   A->ops->solve            = C->ops->solve;
779   A->ops->solvetranspose   = C->ops->solvetranspose;
780   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
781   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
782   PetscFunctionReturn(0);
783 }
784 /* ----------------------------------------------------------- */
785 #undef __FUNCT__
786 #define __FUNCT__ "MatSolve_SeqAIJ"
787 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
788 {
789   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
790   IS                iscol = a->col,isrow = a->row;
791   PetscErrorCode    ierr;
792   PetscInt          *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
793   PetscInt          nz,*rout,*cout;
794   PetscScalar       *x,*tmp,*tmps,sum;
795   const PetscScalar *b;
796   const MatScalar   *aa = a->a,*v;
797 
798   PetscFunctionBegin;
799   if (!n) PetscFunctionReturn(0);
800 
801   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
802   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
803   tmp  = a->solve_work;
804 
805   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
806   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
807 
808   /* forward solve the lower triangular */
809   tmp[0] = b[*r++];
810   tmps   = tmp;
811   for (i=1; i<n; i++) {
812     v   = aa + ai[i] ;
813     vi  = aj + ai[i] ;
814     nz  = a->diag[i] - ai[i];
815     sum = b[*r++];
816     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
817     tmp[i] = sum;
818   }
819 
820   /* backward solve the upper triangular */
821   for (i=n-1; i>=0; i--){
822     v   = aa + a->diag[i] + 1;
823     vi  = aj + a->diag[i] + 1;
824     nz  = ai[i+1] - a->diag[i] - 1;
825     sum = tmp[i];
826     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
827     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
828   }
829 
830   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
831   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
832   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
833   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
834   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatMatSolve_SeqAIJ"
840 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
841 {
842   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
843   IS              iscol = a->col,isrow = a->row;
844   PetscErrorCode  ierr;
845   PetscInt        *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
846   PetscInt        nz,*rout,*cout,neq;
847   PetscScalar     *x,*b,*tmp,*tmps,sum;
848   const MatScalar *aa = a->a,*v;
849   PetscTruth      bisdense,xisdense;
850 
851   PetscFunctionBegin;
852   if (!n) PetscFunctionReturn(0);
853 
854   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
855   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
856   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
857   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
858 
859   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
860   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
861 
862   tmp  = a->solve_work;
863   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
864   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
865 
866   for (neq=0; neq<B->cmap->n; neq++){
867     /* forward solve the lower triangular */
868     tmp[0] = b[r[0]];
869     tmps   = tmp;
870     for (i=1; i<n; i++) {
871       v   = aa + ai[i] ;
872       vi  = aj + ai[i] ;
873       nz  = a->diag[i] - ai[i];
874       sum = b[r[i]];
875       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
876       tmp[i] = sum;
877     }
878     /* backward solve the upper triangular */
879     for (i=n-1; i>=0; i--){
880       v   = aa + a->diag[i] + 1;
881       vi  = aj + a->diag[i] + 1;
882       nz  = ai[i+1] - a->diag[i] - 1;
883       sum = tmp[i];
884       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
885       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
886     }
887 
888     b += n;
889     x += n;
890   }
891   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
892   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
893   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
894   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
895   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
896   PetscFunctionReturn(0);
897 }
898 
899 #undef __FUNCT__
900 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
901 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
902 {
903   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
904   IS              iscol = a->col,isrow = a->row;
905   PetscErrorCode  ierr;
906   PetscInt        *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
907   PetscInt        nz,*rout,*cout,row;
908   PetscScalar     *x,*b,*tmp,*tmps,sum;
909   const MatScalar *aa = a->a,*v;
910 
911   PetscFunctionBegin;
912   if (!n) PetscFunctionReturn(0);
913 
914   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
915   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
916   tmp  = a->solve_work;
917 
918   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
919   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
920 
921   /* forward solve the lower triangular */
922   tmp[0] = b[*r++];
923   tmps   = tmp;
924   for (row=1; row<n; row++) {
925     i   = rout[row]; /* permuted row */
926     v   = aa + ai[i] ;
927     vi  = aj + ai[i] ;
928     nz  = a->diag[i] - ai[i];
929     sum = b[*r++];
930     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
931     tmp[row] = sum;
932   }
933 
934   /* backward solve the upper triangular */
935   for (row=n-1; row>=0; row--){
936     i   = rout[row]; /* permuted row */
937     v   = aa + a->diag[i] + 1;
938     vi  = aj + a->diag[i] + 1;
939     nz  = ai[i+1] - a->diag[i] - 1;
940     sum = tmp[row];
941     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
942     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
943   }
944 
945   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
946   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
947   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
948   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
949   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 /* ----------------------------------------------------------- */
954 #undef __FUNCT__
955 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
956 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
957 {
958   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
959   PetscErrorCode    ierr;
960   PetscInt          n = A->rmap->n;
961   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
962   PetscScalar       *x;
963   const PetscScalar *b;
964   const MatScalar   *aa = a->a;
965 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
966   PetscInt          adiag_i,i,nz,ai_i;
967   const MatScalar   *v;
968   PetscScalar       sum;
969 #endif
970 
971   PetscFunctionBegin;
972   if (!n) PetscFunctionReturn(0);
973 
974   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
975   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
976 
977 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
978   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
979 #else
980   /* forward solve the lower triangular */
981   x[0] = b[0];
982   for (i=1; i<n; i++) {
983     ai_i = ai[i];
984     v    = aa + ai_i;
985     vi   = aj + ai_i;
986     nz   = adiag[i] - ai_i;
987     sum  = b[i];
988     while (nz--) sum -= *v++ * x[*vi++];
989     x[i] = sum;
990   }
991 
992   /* backward solve the upper triangular */
993   for (i=n-1; i>=0; i--){
994     adiag_i = adiag[i];
995     v       = aa + adiag_i + 1;
996     vi      = aj + adiag_i + 1;
997     nz      = ai[i+1] - adiag_i - 1;
998     sum     = x[i];
999     while (nz--) sum -= *v++ * x[*vi++];
1000     x[i]    = sum*aa[adiag_i];
1001   }
1002 #endif
1003   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
1004   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1005   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1011 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1012 {
1013   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1014   IS              iscol = a->col,isrow = a->row;
1015   PetscErrorCode  ierr;
1016   PetscInt        *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1017   PetscInt        nz,*rout,*cout;
1018   PetscScalar     *x,*b,*tmp,sum;
1019   const MatScalar *aa = a->a,*v;
1020 
1021   PetscFunctionBegin;
1022   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1023 
1024   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1025   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1026   tmp  = a->solve_work;
1027 
1028   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1029   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1030 
1031   /* forward solve the lower triangular */
1032   tmp[0] = b[*r++];
1033   for (i=1; i<n; i++) {
1034     v   = aa + ai[i] ;
1035     vi  = aj + ai[i] ;
1036     nz  = a->diag[i] - ai[i];
1037     sum = b[*r++];
1038     while (nz--) sum -= *v++ * tmp[*vi++ ];
1039     tmp[i] = sum;
1040   }
1041 
1042   /* backward solve the upper triangular */
1043   for (i=n-1; i>=0; i--){
1044     v   = aa + a->diag[i] + 1;
1045     vi  = aj + a->diag[i] + 1;
1046     nz  = ai[i+1] - a->diag[i] - 1;
1047     sum = tmp[i];
1048     while (nz--) sum -= *v++ * tmp[*vi++ ];
1049     tmp[i] = sum*aa[a->diag[i]];
1050     x[*c--] += tmp[i];
1051   }
1052 
1053   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1054   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1055   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1056   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1057   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1058 
1059   PetscFunctionReturn(0);
1060 }
1061 /* -------------------------------------------------------------------*/
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1064 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1065 {
1066   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1067   IS              iscol = a->col,isrow = a->row;
1068   PetscErrorCode  ierr;
1069   PetscInt        *r,*c,i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1070   PetscInt        nz,*rout,*cout,*diag = a->diag;
1071   PetscScalar     *x,*b,*tmp,s1;
1072   const MatScalar *aa = a->a,*v;
1073 
1074   PetscFunctionBegin;
1075   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1076   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1077   tmp  = a->solve_work;
1078 
1079   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1080   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1081 
1082   /* copy the b into temp work space according to permutation */
1083   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1084 
1085   /* forward solve the U^T */
1086   for (i=0; i<n; i++) {
1087     v   = aa + diag[i] ;
1088     vi  = aj + diag[i] + 1;
1089     nz  = ai[i+1] - diag[i] - 1;
1090     s1  = tmp[i];
1091     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1092     while (nz--) {
1093       tmp[*vi++ ] -= (*v++)*s1;
1094     }
1095     tmp[i] = s1;
1096   }
1097 
1098   /* backward solve the L^T */
1099   for (i=n-1; i>=0; i--){
1100     v   = aa + diag[i] - 1 ;
1101     vi  = aj + diag[i] - 1 ;
1102     nz  = diag[i] - ai[i];
1103     s1  = tmp[i];
1104     while (nz--) {
1105       tmp[*vi-- ] -= (*v--)*s1;
1106     }
1107   }
1108 
1109   /* copy tmp into x according to permutation */
1110   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1111 
1112   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1113   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1114   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1115   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1116 
1117   ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1123 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1124 {
1125   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1126   IS              iscol = a->col,isrow = a->row;
1127   PetscErrorCode  ierr;
1128   PetscInt        *r,*c,i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1129   PetscInt        nz,*rout,*cout,*diag = a->diag;
1130   PetscScalar     *x,*b,*tmp;
1131   const MatScalar *aa = a->a,*v;
1132 
1133   PetscFunctionBegin;
1134   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1135 
1136   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1137   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1138   tmp = a->solve_work;
1139 
1140   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1141   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1142 
1143   /* copy the b into temp work space according to permutation */
1144   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1145 
1146   /* forward solve the U^T */
1147   for (i=0; i<n; i++) {
1148     v   = aa + diag[i] ;
1149     vi  = aj + diag[i] + 1;
1150     nz  = ai[i+1] - diag[i] - 1;
1151     tmp[i] *= *v++;
1152     while (nz--) {
1153       tmp[*vi++ ] -= (*v++)*tmp[i];
1154     }
1155   }
1156 
1157   /* backward solve the L^T */
1158   for (i=n-1; i>=0; i--){
1159     v   = aa + diag[i] - 1 ;
1160     vi  = aj + diag[i] - 1 ;
1161     nz  = diag[i] - ai[i];
1162     while (nz--) {
1163       tmp[*vi-- ] -= (*v--)*tmp[i];
1164     }
1165   }
1166 
1167   /* copy tmp into x according to permutation */
1168   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1169 
1170   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1171   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1172   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1173   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1174 
1175   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 /* ----------------------------------------------------------------*/
1179 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1180 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,MatDuplicateOption,Mat*);
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1184 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
1185 {
1186   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1187   IS                 isicol;
1188   PetscErrorCode     ierr;
1189   PetscInt           *r,*ic,n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1190   PetscInt           *bi,*cols,nnz,*cols_lvl;
1191   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1192   PetscInt           i,levels,diagonal_fill;
1193   PetscTruth         col_identity,row_identity;
1194   PetscReal          f;
1195   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1196   PetscBT            lnkbt;
1197   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1198   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1199   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1200   PetscTruth         missing;
1201 
1202   PetscFunctionBegin;
1203   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1204   f             = info->fill;
1205   levels        = (PetscInt)info->levels;
1206   diagonal_fill = (PetscInt)info->diagonal_fill;
1207   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1208 
1209   /* special case that simply copies fill pattern */
1210   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1211   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1212   if (!levels && row_identity && col_identity) {
1213     ierr = MatDuplicateNoCreate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
1214     (*fact)->factor                 = MAT_FACTOR_LU;
1215     (*fact)->info.factor_mallocs    = 0;
1216     (*fact)->info.fill_ratio_given  = info->fill;
1217     (*fact)->info.fill_ratio_needed = 1.0;
1218     b               = (Mat_SeqAIJ*)(*fact)->data;
1219     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1220     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1221     b->row              = isrow;
1222     b->col              = iscol;
1223     b->icol             = isicol;
1224     ierr                = PetscMalloc(((*fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1225     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1226     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1227     (*fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1228     ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1229     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
1230     PetscFunctionReturn(0);
1231   }
1232 
1233   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1234   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1235 
1236   /* get new row pointers */
1237   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1238   bi[0] = 0;
1239   /* bdiag is location of diagonal in factor */
1240   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1241   bdiag[0]  = 0;
1242 
1243   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1244   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1245 
1246   /* create a linked list for storing column indices of the active row */
1247   nlnk = n + 1;
1248   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1249 
1250   /* initial FreeSpace size is f*(ai[n]+1) */
1251   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1252   current_space = free_space;
1253   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1254   current_space_lvl = free_space_lvl;
1255 
1256   for (i=0; i<n; i++) {
1257     nzi = 0;
1258     /* copy current row into linked list */
1259     nnz  = ai[r[i]+1] - ai[r[i]];
1260     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1261     cols = aj + ai[r[i]];
1262     lnk[i] = -1; /* marker to indicate if diagonal exists */
1263     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1264     nzi += nlnk;
1265 
1266     /* make sure diagonal entry is included */
1267     if (diagonal_fill && lnk[i] == -1) {
1268       fm = n;
1269       while (lnk[fm] < i) fm = lnk[fm];
1270       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1271       lnk[fm]    = i;
1272       lnk_lvl[i] = 0;
1273       nzi++; dcount++;
1274     }
1275 
1276     /* add pivot rows into the active row */
1277     nzbd = 0;
1278     prow = lnk[n];
1279     while (prow < i) {
1280       nnz      = bdiag[prow];
1281       cols     = bj_ptr[prow] + nnz + 1;
1282       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1283       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1284       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1285       nzi += nlnk;
1286       prow = lnk[prow];
1287       nzbd++;
1288     }
1289     bdiag[i] = nzbd;
1290     bi[i+1]  = bi[i] + nzi;
1291 
1292     /* if free space is not available, make more free space */
1293     if (current_space->local_remaining<nzi) {
1294       nnz = nzi*(n - i); /* estimated and max additional space needed */
1295       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1296       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1297       reallocs++;
1298     }
1299 
1300     /* copy data into free_space and free_space_lvl, then initialize lnk */
1301     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1302     bj_ptr[i]    = current_space->array;
1303     bjlvl_ptr[i] = current_space_lvl->array;
1304 
1305     /* make sure the active row i has diagonal entry */
1306     if (*(bj_ptr[i]+bdiag[i]) != i) {
1307       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1308     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1309     }
1310 
1311     current_space->array           += nzi;
1312     current_space->local_used      += nzi;
1313     current_space->local_remaining -= nzi;
1314     current_space_lvl->array           += nzi;
1315     current_space_lvl->local_used      += nzi;
1316     current_space_lvl->local_remaining -= nzi;
1317   }
1318 
1319   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1320   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1321 
1322   /* destroy list of free space and other temporary arrays */
1323   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1324   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1325   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1326   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1327   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1328 
1329 #if defined(PETSC_USE_INFO)
1330   {
1331     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1332     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1333     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1334     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1335     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1336     if (diagonal_fill) {
1337       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1338     }
1339   }
1340 #endif
1341 
1342   /* put together the new matrix */
1343   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1344   b = (Mat_SeqAIJ*)(*fact)->data;
1345   b->free_a       = PETSC_TRUE;
1346   b->free_ij      = PETSC_TRUE;
1347   b->singlemalloc = PETSC_FALSE;
1348   len = (bi[n] )*sizeof(PetscScalar);
1349   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1350   b->j          = bj;
1351   b->i          = bi;
1352   for (i=0; i<n; i++) bdiag[i] += bi[i];
1353   b->diag       = bdiag;
1354   b->ilen       = 0;
1355   b->imax       = 0;
1356   b->row        = isrow;
1357   b->col        = iscol;
1358   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1359   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1360   b->icol       = isicol;
1361   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1362   /* In b structure:  Free imax, ilen, old a, old j.
1363      Allocate bdiag, solve_work, new a, new j */
1364   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1365   b->maxnz             = b->nz = bi[n] ;
1366   (*fact)->factor = MAT_FACTOR_LU;
1367   (*fact)->info.factor_mallocs    = reallocs;
1368   (*fact)->info.fill_ratio_given  = f;
1369   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1370   (*fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1371   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #include "src/mat/impls/sbaij/seq/sbaij.h"
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1378 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1379 {
1380   Mat            C = *B;
1381   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1382   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1383   IS             ip=b->row,iip = b->icol;
1384   PetscErrorCode ierr;
1385   PetscInt       *rip,*riip,i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1386   PetscInt       *ai=a->i,*aj=a->j;
1387   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1388   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1389   PetscReal      zeropivot,rs,shiftnz;
1390   PetscReal      shiftpd;
1391   ChShift_Ctx    sctx;
1392   PetscInt       newshift;
1393   PetscTruth     perm_identity;
1394 
1395   PetscFunctionBegin;
1396 
1397   shiftnz   = info->shiftnz;
1398   shiftpd   = info->shiftpd;
1399   zeropivot = info->zeropivot;
1400 
1401   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1402   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1403 
1404   /* initialization */
1405   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1406   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1407   jl   = il + mbs;
1408   rtmp = (MatScalar*)(jl + mbs);
1409 
1410   sctx.shift_amount = 0;
1411   sctx.nshift       = 0;
1412   do {
1413     sctx.chshift = PETSC_FALSE;
1414     for (i=0; i<mbs; i++) {
1415       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1416     }
1417 
1418     for (k = 0; k<mbs; k++){
1419       bval = ba + bi[k];
1420       /* initialize k-th row by the perm[k]-th row of A */
1421       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1422       for (j = jmin; j < jmax; j++){
1423         col = riip[aj[j]];
1424         if (col >= k){ /* only take upper triangular entry */
1425           rtmp[col] = aa[j];
1426           *bval++  = 0.0; /* for in-place factorization */
1427         }
1428       }
1429       /* shift the diagonal of the matrix */
1430       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1431 
1432       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1433       dk = rtmp[k];
1434       i = jl[k]; /* first row to be added to k_th row  */
1435 
1436       while (i < k){
1437         nexti = jl[i]; /* next row to be added to k_th row */
1438 
1439         /* compute multiplier, update diag(k) and U(i,k) */
1440         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1441         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1442         dk += uikdi*ba[ili];
1443         ba[ili] = uikdi; /* -U(i,k) */
1444 
1445         /* add multiple of row i to k-th row */
1446         jmin = ili + 1; jmax = bi[i+1];
1447         if (jmin < jmax){
1448           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1449           /* update il and jl for row i */
1450           il[i] = jmin;
1451           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1452         }
1453         i = nexti;
1454       }
1455 
1456       /* shift the diagonals when zero pivot is detected */
1457       /* compute rs=sum of abs(off-diagonal) */
1458       rs   = 0.0;
1459       jmin = bi[k]+1;
1460       nz   = bi[k+1] - jmin;
1461       bcol = bj + jmin;
1462       while (nz--){
1463         rs += PetscAbsScalar(rtmp[*bcol]);
1464         bcol++;
1465       }
1466 
1467       sctx.rs = rs;
1468       sctx.pv = dk;
1469       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1470 
1471       if (newshift == 1) {
1472         if (!sctx.shift_amount) {
1473           sctx.shift_amount = 1e-5;
1474         }
1475         break;
1476       }
1477 
1478       /* copy data into U(k,:) */
1479       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1480       jmin = bi[k]+1; jmax = bi[k+1];
1481       if (jmin < jmax) {
1482         for (j=jmin; j<jmax; j++){
1483           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1484         }
1485         /* add the k-th row into il and jl */
1486         il[k] = jmin;
1487         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1488       }
1489     }
1490   } while (sctx.chshift);
1491   ierr = PetscFree(il);CHKERRQ(ierr);
1492 
1493   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1494   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1495 
1496   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
1497   if (perm_identity){
1498     (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1499     (*B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1500     (*B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1501     (*B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1502   } else {
1503     (*B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1504     (*B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1505     (*B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1506     (*B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1507   }
1508 
1509   C->assembled    = PETSC_TRUE;
1510   C->preallocated = PETSC_TRUE;
1511   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
1512   if (sctx.nshift){
1513     if (shiftnz) {
1514       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1515     } else if (shiftpd) {
1516       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1517     }
1518   }
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1524 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1525 {
1526   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1527   Mat_SeqSBAIJ       *b;
1528   PetscErrorCode     ierr;
1529   PetscTruth         perm_identity,missing;
1530   PetscInt           reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1531   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1532   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1533   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1534   PetscReal          fill=info->fill,levels=info->levels;
1535   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1536   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1537   PetscBT            lnkbt;
1538   IS                 iperm;
1539 
1540   PetscFunctionBegin;
1541   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1542   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1543   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1544   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1545   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1546 
1547   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1548   ui[0] = 0;
1549 
1550   /* ICC(0) without matrix ordering: simply copies fill pattern */
1551   if (!levels && perm_identity) {
1552 
1553     for (i=0; i<am; i++) {
1554       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1555     }
1556     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1557     cols = uj;
1558     for (i=0; i<am; i++) {
1559       aj    = a->j + a->diag[i];
1560       ncols = ui[i+1] - ui[i];
1561       for (j=0; j<ncols; j++) *cols++ = *aj++;
1562     }
1563   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1564     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1565     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1566 
1567     /* initialization */
1568     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1569 
1570     /* jl: linked list for storing indices of the pivot rows
1571        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1572     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1573     il         = jl + am;
1574     uj_ptr     = (PetscInt**)(il + am);
1575     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1576     for (i=0; i<am; i++){
1577       jl[i] = am; il[i] = 0;
1578     }
1579 
1580     /* create and initialize a linked list for storing column indices of the active row k */
1581     nlnk = am + 1;
1582     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1583 
1584     /* initial FreeSpace size is fill*(ai[am]+1) */
1585     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1586     current_space = free_space;
1587     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1588     current_space_lvl = free_space_lvl;
1589 
1590     for (k=0; k<am; k++){  /* for each active row k */
1591       /* initialize lnk by the column indices of row rip[k] of A */
1592       nzk   = 0;
1593       ncols = ai[rip[k]+1] - ai[rip[k]];
1594       if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1595       ncols_upper = 0;
1596       for (j=0; j<ncols; j++){
1597         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1598         if (riip[i] >= k){ /* only take upper triangular entry */
1599           ajtmp[ncols_upper] = i;
1600           ncols_upper++;
1601         }
1602       }
1603       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1604       nzk += nlnk;
1605 
1606       /* update lnk by computing fill-in for each pivot row to be merged in */
1607       prow = jl[k]; /* 1st pivot row */
1608 
1609       while (prow < k){
1610         nextprow = jl[prow];
1611 
1612         /* merge prow into k-th row */
1613         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1614         jmax = ui[prow+1];
1615         ncols = jmax-jmin;
1616         i     = jmin - ui[prow];
1617         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1618         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1619         j     = *(uj - 1);
1620         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1621         nzk += nlnk;
1622 
1623         /* update il and jl for prow */
1624         if (jmin < jmax){
1625           il[prow] = jmin;
1626           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1627         }
1628         prow = nextprow;
1629       }
1630 
1631       /* if free space is not available, make more free space */
1632       if (current_space->local_remaining<nzk) {
1633         i = am - k + 1; /* num of unfactored rows */
1634         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1635         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1636         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1637         reallocs++;
1638       }
1639 
1640       /* copy data into free_space and free_space_lvl, then initialize lnk */
1641       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1642       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1643 
1644       /* add the k-th row into il and jl */
1645       if (nzk > 1){
1646         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1647         jl[k] = jl[i]; jl[i] = k;
1648         il[k] = ui[k] + 1;
1649       }
1650       uj_ptr[k]     = current_space->array;
1651       uj_lvl_ptr[k] = current_space_lvl->array;
1652 
1653       current_space->array           += nzk;
1654       current_space->local_used      += nzk;
1655       current_space->local_remaining -= nzk;
1656 
1657       current_space_lvl->array           += nzk;
1658       current_space_lvl->local_used      += nzk;
1659       current_space_lvl->local_remaining -= nzk;
1660 
1661       ui[k+1] = ui[k] + nzk;
1662     }
1663 
1664 #if defined(PETSC_USE_INFO)
1665     if (ai[am] != 0) {
1666       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1667       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1668       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1669       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1670     } else {
1671       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1672     }
1673 #endif
1674 
1675     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1676     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1677     ierr = PetscFree(jl);CHKERRQ(ierr);
1678     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1679 
1680     /* destroy list of free space and other temporary array(s) */
1681     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1682     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1683     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1684     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1685 
1686   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1687 
1688   /* put together the new matrix in MATSEQSBAIJ format */
1689 
1690   b    = (Mat_SeqSBAIJ*)(*fact)->data;
1691   b->singlemalloc = PETSC_FALSE;
1692   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1693   b->j    = uj;
1694   b->i    = ui;
1695   b->diag = 0;
1696   b->ilen = 0;
1697   b->imax = 0;
1698   b->row  = perm;
1699   b->col  = perm;
1700   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1701   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1702   b->icol = iperm;
1703   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1704   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1705   ierr = PetscLogObjectMemory((*fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1706   b->maxnz   = b->nz = ui[am];
1707   b->free_a  = PETSC_TRUE;
1708   b->free_ij = PETSC_TRUE;
1709 
1710   (*fact)->info.factor_mallocs    = reallocs;
1711   (*fact)->info.fill_ratio_given  = fill;
1712   if (ai[am] != 0) {
1713     (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1714   } else {
1715     (*fact)->info.fill_ratio_needed = 0.0;
1716   }
1717   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1718   PetscFunctionReturn(0);
1719 }
1720 
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1723 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1724 {
1725   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1726   Mat_SeqSBAIJ       *b;
1727   PetscErrorCode     ierr;
1728   PetscTruth         perm_identity;
1729   PetscReal          fill = info->fill;
1730   PetscInt           *rip,*riip,i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1731   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1732   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1733   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1734   PetscBT            lnkbt;
1735   IS                 iperm;
1736 
1737   PetscFunctionBegin;
1738   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1739   /* check whether perm is the identity mapping */
1740   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1741   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1742   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1743   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1744 
1745   /* initialization */
1746   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1747   ui[0] = 0;
1748 
1749   /* jl: linked list for storing indices of the pivot rows
1750      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1751   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1752   il     = jl + am;
1753   cols   = il + am;
1754   ui_ptr = (PetscInt**)(cols + am);
1755   for (i=0; i<am; i++){
1756     jl[i] = am; il[i] = 0;
1757   }
1758 
1759   /* create and initialize a linked list for storing column indices of the active row k */
1760   nlnk = am + 1;
1761   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1762 
1763   /* initial FreeSpace size is fill*(ai[am]+1) */
1764   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1765   current_space = free_space;
1766 
1767   for (k=0; k<am; k++){  /* for each active row k */
1768     /* initialize lnk by the column indices of row rip[k] of A */
1769     nzk   = 0;
1770     ncols = ai[rip[k]+1] - ai[rip[k]];
1771     if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1772     ncols_upper = 0;
1773     for (j=0; j<ncols; j++){
1774       i = riip[*(aj + ai[rip[k]] + j)];
1775       if (i >= k){ /* only take upper triangular entry */
1776         cols[ncols_upper] = i;
1777         ncols_upper++;
1778       }
1779     }
1780     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1781     nzk += nlnk;
1782 
1783     /* update lnk by computing fill-in for each pivot row to be merged in */
1784     prow = jl[k]; /* 1st pivot row */
1785 
1786     while (prow < k){
1787       nextprow = jl[prow];
1788       /* merge prow into k-th row */
1789       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1790       jmax = ui[prow+1];
1791       ncols = jmax-jmin;
1792       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1793       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1794       nzk += nlnk;
1795 
1796       /* update il and jl for prow */
1797       if (jmin < jmax){
1798         il[prow] = jmin;
1799         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1800       }
1801       prow = nextprow;
1802     }
1803 
1804     /* if free space is not available, make more free space */
1805     if (current_space->local_remaining<nzk) {
1806       i = am - k + 1; /* num of unfactored rows */
1807       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1808       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1809       reallocs++;
1810     }
1811 
1812     /* copy data into free space, then initialize lnk */
1813     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1814 
1815     /* add the k-th row into il and jl */
1816     if (nzk-1 > 0){
1817       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1818       jl[k] = jl[i]; jl[i] = k;
1819       il[k] = ui[k] + 1;
1820     }
1821     ui_ptr[k] = current_space->array;
1822     current_space->array           += nzk;
1823     current_space->local_used      += nzk;
1824     current_space->local_remaining -= nzk;
1825 
1826     ui[k+1] = ui[k] + nzk;
1827   }
1828 
1829 #if defined(PETSC_USE_INFO)
1830   if (ai[am] != 0) {
1831     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1832     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1833     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1834     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1835   } else {
1836      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1837   }
1838 #endif
1839 
1840   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1841   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1842   ierr = PetscFree(jl);CHKERRQ(ierr);
1843 
1844   /* destroy list of free space and other temporary array(s) */
1845   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1846   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1847   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1848 
1849   /* put together the new matrix in MATSEQSBAIJ format */
1850 
1851   b = (Mat_SeqSBAIJ*)(*fact)->data;
1852   b->singlemalloc = PETSC_FALSE;
1853   b->free_a       = PETSC_TRUE;
1854   b->free_ij      = PETSC_TRUE;
1855   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1856   b->j    = uj;
1857   b->i    = ui;
1858   b->diag = 0;
1859   b->ilen = 0;
1860   b->imax = 0;
1861   b->row  = perm;
1862   b->col  = perm;
1863   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1864   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1865   b->icol = iperm;
1866   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1867   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1868   ierr    = PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1869   b->maxnz = b->nz = ui[am];
1870 
1871   (*fact)->info.factor_mallocs    = reallocs;
1872   (*fact)->info.fill_ratio_given  = fill;
1873   if (ai[am] != 0) {
1874     (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1875   } else {
1876     (*fact)->info.fill_ratio_needed = 0.0;
1877   }
1878   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1879   PetscFunctionReturn(0);
1880 }
1881