xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 1f0e42cf17e8fdc56355134e13a5edcdb6dc1c19)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
10 /*
11       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12 
13       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
14 */
15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
16 {
17   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
18   PetscErrorCode    ierr;
19   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20   const PetscInt    *ai = a->i, *aj = a->j;
21   const PetscScalar *aa = a->a;
22   PetscBool         *done;
23   PetscReal         best,past = 0,future;
24 
25   PetscFunctionBegin;
26   /* pick initial row */
27   best = -1;
28   for (i=0; i<n; i++) {
29     future = 0.0;
30     for (j=ai[i]; j<ai[i+1]; j++) {
31       if (aj[j] != i) future  += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
32     }
33     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34     if (past/future > best) {
35       best = past/future;
36       current = i;
37     }
38   }
39 
40   ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr);
41   ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
42   ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
43   order[0] = current;
44   for (i=0; i<n-1; i++) {
45     done[current] = PETSC_TRUE;
46     best          = -1;
47     /* loop over all neighbors of current pivot */
48     for (j=ai[current]; j<ai[current+1]; j++) {
49       jj = aj[j];
50       if (done[jj]) continue;
51       /* loop over columns of potential next row computing weights for below and above diagonal */
52       past = future = 0.0;
53       for (k=ai[jj]; k<ai[jj+1]; k++) {
54         kk = aj[k];
55         if (done[kk]) past += PetscAbsScalar(aa[k]);
56         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
57       }
58       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59       if (past/future > best) {
60         best = past/future;
61         newcurrent = jj;
62       }
63     }
64     if (best == -1) { /* no neighbors to select from so select best of all that remain */
65       best = -1;
66       for (k=0; k<n; k++) {
67         if (done[k]) continue;
68         future = 0.0;
69         past   = 0.0;
70         for (j=ai[k]; j<ai[k+1]; j++) {
71           kk = aj[j];
72           if (done[kk]) past += PetscAbsScalar(aa[j]);
73           else if (kk != k) future  += PetscAbsScalar(aa[j]);
74         }
75         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76         if (past/future > best) {
77           best = past/future;
78           newcurrent = k;
79         }
80       }
81     }
82     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83     current = newcurrent;
84     order[i+1] = current;
85   }
86   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
87   *icol = *irow;
88   ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
89   ierr = PetscFree(done);CHKERRQ(ierr);
90   ierr = PetscFree(order);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 EXTERN_C_BEGIN
96 #undef __FUNCT__
97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool  *flg)
99 {
100   PetscFunctionBegin;
101   *flg = PETSC_TRUE;
102   PetscFunctionReturn(0);
103 }
104 EXTERN_C_END
105 
106 EXTERN_C_BEGIN
107 #undef __FUNCT__
108 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
110 {
111   PetscInt           n = A->rmap->n;
112   PetscErrorCode     ierr;
113 
114   PetscFunctionBegin;
115   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
116   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
117   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
118     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
119     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
120     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
121     ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
122   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
123     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
124     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
125     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
126     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
127   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
128   (*B)->factortype = ftype;
129   PetscFunctionReturn(0);
130 }
131 EXTERN_C_END
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
135 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
136 {
137   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
138   IS                 isicol;
139   PetscErrorCode     ierr;
140   const PetscInt     *r,*ic;
141   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
142   PetscInt           *bi,*bj,*ajtmp;
143   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
144   PetscReal          f;
145   PetscInt           nlnk,*lnk,k,**bi_ptr;
146   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
147   PetscBT            lnkbt;
148 
149   PetscFunctionBegin;
150   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
151   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
152   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
153   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
154 
155   /* get new row pointers */
156   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
157   bi[0] = 0;
158 
159   /* bdiag is location of diagonal in factor */
160   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
161   bdiag[0] = 0;
162 
163   /* linked list for storing column indices of the active row */
164   nlnk = n + 1;
165   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
166 
167   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
168 
169   /* initial FreeSpace size is f*(ai[n]+1) */
170   f = info->fill;
171   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
172   current_space = free_space;
173 
174   for (i=0; i<n; i++) {
175     /* copy previous fill into linked list */
176     nzi = 0;
177     nnz = ai[r[i]+1] - ai[r[i]];
178     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
179     ajtmp = aj + ai[r[i]];
180     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
181     nzi += nlnk;
182 
183     /* add pivot rows into linked list */
184     row = lnk[n];
185     while (row < i) {
186       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
187       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
188       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
189       nzi += nlnk;
190       row  = lnk[row];
191     }
192     bi[i+1] = bi[i] + nzi;
193     im[i]   = nzi;
194 
195     /* mark bdiag */
196     nzbd = 0;
197     nnz  = nzi;
198     k    = lnk[n];
199     while (nnz-- && k < i){
200       nzbd++;
201       k = lnk[k];
202     }
203     bdiag[i] = bi[i] + nzbd;
204 
205     /* if free space is not available, make more free space */
206     if (current_space->local_remaining<nzi) {
207       nnz = (n - i)*nzi; /* estimated and max additional space needed */
208       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
209       reallocs++;
210     }
211 
212     /* copy data into free space, then initialize lnk */
213     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
214     bi_ptr[i] = current_space->array;
215     current_space->array           += nzi;
216     current_space->local_used      += nzi;
217     current_space->local_remaining -= nzi;
218   }
219 #if defined(PETSC_USE_INFO)
220   if (ai[n] != 0) {
221     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
222     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
223     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
224     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
225     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
226   } else {
227     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
228   }
229 #endif
230 
231   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
232   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
233 
234   /* destroy list of free space and other temporary array(s) */
235   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
236   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
237   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
238   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
239 
240   /* put together the new matrix */
241   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
242   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
243   b    = (Mat_SeqAIJ*)(B)->data;
244   b->free_a       = PETSC_TRUE;
245   b->free_ij      = PETSC_TRUE;
246   b->singlemalloc = PETSC_FALSE;
247   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
248   b->j          = bj;
249   b->i          = bi;
250   b->diag       = bdiag;
251   b->ilen       = 0;
252   b->imax       = 0;
253   b->row        = isrow;
254   b->col        = iscol;
255   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
256   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
257   b->icol       = isicol;
258   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
259 
260   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
261   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
262   b->maxnz = b->nz = bi[n] ;
263 
264   (B)->factortype            = MAT_FACTOR_LU;
265   (B)->info.factor_mallocs   = reallocs;
266   (B)->info.fill_ratio_given = f;
267 
268   if (ai[n]) {
269     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
270   } else {
271     (B)->info.fill_ratio_needed = 0.0;
272   }
273   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_inplace;
274   if (a->inode.size) {
275     (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
276   }
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
282 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
283 {
284   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
285   IS                 isicol;
286   PetscErrorCode     ierr;
287   const PetscInt     *r,*ic;
288   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
289   PetscInt           *bi,*bj,*ajtmp;
290   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
291   PetscReal          f;
292   PetscInt           nlnk,*lnk,k,**bi_ptr;
293   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
294   PetscBT            lnkbt;
295 
296   PetscFunctionBegin;
297   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
298   /*
299   PetscBool          olddatastruct=PETSC_FALSE;
300   ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
301   if(olddatastruct){
302     ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr);
303     PetscFunctionReturn(0);
304   }
305   */
306   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
307   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
308   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
309   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
310 
311   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
313   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
314   bi[0] = bdiag[0] = 0;
315 
316   /* linked list for storing column indices of the active row */
317   nlnk = n + 1;
318   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
319 
320   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
321 
322   /* initial FreeSpace size is f*(ai[n]+1) */
323   f = info->fill;
324   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
325   current_space = free_space;
326 
327   for (i=0; i<n; i++) {
328     /* copy previous fill into linked list */
329     nzi = 0;
330     nnz = ai[r[i]+1] - ai[r[i]];
331     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
332     ajtmp = aj + ai[r[i]];
333     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
334     nzi += nlnk;
335 
336     /* add pivot rows into linked list */
337     row = lnk[n];
338     while (row < i){
339       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
340       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
342       nzi  += nlnk;
343       row   = lnk[row];
344     }
345     bi[i+1] = bi[i] + nzi;
346     im[i]   = nzi;
347 
348     /* mark bdiag */
349     nzbd = 0;
350     nnz  = nzi;
351     k    = lnk[n];
352     while (nnz-- && k < i){
353       nzbd++;
354       k = lnk[k];
355     }
356     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
357 
358     /* if free space is not available, make more free space */
359     if (current_space->local_remaining<nzi) {
360       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
361       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
362       reallocs++;
363     }
364 
365     /* copy data into free space, then initialize lnk */
366     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
367     bi_ptr[i] = current_space->array;
368     current_space->array           += nzi;
369     current_space->local_used      += nzi;
370     current_space->local_remaining -= nzi;
371   }
372 
373   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
374   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
375 
376   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
377   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
378   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
379   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
380   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
381 
382   /* put together the new matrix */
383   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
384   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
385   b    = (Mat_SeqAIJ*)(B)->data;
386   b->free_a       = PETSC_TRUE;
387   b->free_ij      = PETSC_TRUE;
388   b->singlemalloc = PETSC_FALSE;
389   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
390   b->j          = bj;
391   b->i          = bi;
392   b->diag       = bdiag;
393   b->ilen       = 0;
394   b->imax       = 0;
395   b->row        = isrow;
396   b->col        = iscol;
397   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
398   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
399   b->icol       = isicol;
400   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
401 
402   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
403   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
404   b->maxnz = b->nz = bdiag[0]+1;
405   B->factortype            = MAT_FACTOR_LU;
406   B->info.factor_mallocs   = reallocs;
407   B->info.fill_ratio_given = f;
408 
409   if (ai[n]) {
410     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
411   } else {
412     B->info.fill_ratio_needed = 0.0;
413   }
414 #if defined(PETSC_USE_INFO)
415   if (ai[n] != 0) {
416     PetscReal af = B->info.fill_ratio_needed;
417     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
418     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
419     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
420     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
421   } else {
422     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
423   }
424 #endif
425   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426   if (a->inode.size) {
427     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
428   }
429   PetscFunctionReturn(0);
430 }
431 
432 /*
433     Trouble in factorization, should we dump the original matrix?
434 */
435 #undef __FUNCT__
436 #define __FUNCT__ "MatFactorDumpMatrix"
437 PetscErrorCode MatFactorDumpMatrix(Mat A)
438 {
439   PetscErrorCode ierr;
440   PetscBool      flg = PETSC_FALSE;
441 
442   PetscFunctionBegin;
443   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
444   if (flg) {
445     PetscViewer viewer;
446     char        filename[PETSC_MAX_PATH_LEN];
447 
448     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
449     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
450     ierr = MatView(A,viewer);CHKERRQ(ierr);
451     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
452   }
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNCT__
457 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
458 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
459 {
460   Mat              C=B;
461   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
462   IS               isrow = b->row,isicol = b->icol;
463   PetscErrorCode   ierr;
464   const PetscInt   *r,*ic,*ics;
465   const PetscInt   n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
466   PetscInt         i,j,k,nz,nzL,row,*pj;
467   const PetscInt   *ajtmp,*bjtmp;
468   MatScalar        *rtmp,*pc,multiplier,*pv;
469   const  MatScalar *aa=a->a,*v;
470   PetscBool        row_identity,col_identity;
471   FactorShiftCtx   sctx;
472   const PetscInt   *ddiag;
473   PetscReal        rs;
474   MatScalar        d;
475 
476   PetscFunctionBegin;
477   /* MatPivotSetUp(): initialize shift context sctx */
478   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
479 
480   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
481     ddiag          = a->diag;
482     sctx.shift_top = info->zeropivot;
483     for (i=0; i<n; i++) {
484       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
485       d  = (aa)[ddiag[i]];
486       rs = -PetscAbsScalar(d) - PetscRealPart(d);
487       v  = aa+ai[i];
488       nz = ai[i+1] - ai[i];
489       for (j=0; j<nz; j++)
490 	rs += PetscAbsScalar(v[j]);
491       if (rs>sctx.shift_top) sctx.shift_top = rs;
492     }
493     sctx.shift_top   *= 1.1;
494     sctx.nshift_max   = 5;
495     sctx.shift_lo     = 0.;
496     sctx.shift_hi     = 1.;
497   }
498 
499   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
500   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
501   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
502   ics  = ic;
503 
504   do {
505     sctx.newshift = PETSC_FALSE;
506     for (i=0; i<n; i++){
507       /* zero rtmp */
508       /* L part */
509       nz    = bi[i+1] - bi[i];
510       bjtmp = bj + bi[i];
511       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
512 
513       /* U part */
514       nz = bdiag[i]-bdiag[i+1];
515       bjtmp = bj + bdiag[i+1]+1;
516       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
517 
518       /* load in initial (unfactored row) */
519       nz    = ai[r[i]+1] - ai[r[i]];
520       ajtmp = aj + ai[r[i]];
521       v     = aa + ai[r[i]];
522       for (j=0; j<nz; j++) {
523         rtmp[ics[ajtmp[j]]] = v[j];
524       }
525       /* ZeropivotApply() */
526       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
527 
528       /* elimination */
529       bjtmp = bj + bi[i];
530       row   = *bjtmp++;
531       nzL   = bi[i+1] - bi[i];
532       for(k=0; k < nzL;k++) {
533         pc = rtmp + row;
534         if (*pc != 0.0) {
535           pv         = b->a + bdiag[row];
536           multiplier = *pc * (*pv);
537           *pc        = multiplier;
538           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
539 	  pv = b->a + bdiag[row+1]+1;
540 	  nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
541           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
542           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
543         }
544         row = *bjtmp++;
545       }
546 
547       /* finished row so stick it into b->a */
548       rs = 0.0;
549       /* L part */
550       pv   = b->a + bi[i] ;
551       pj   = b->j + bi[i] ;
552       nz   = bi[i+1] - bi[i];
553       for (j=0; j<nz; j++) {
554         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
555       }
556 
557       /* U part */
558       pv = b->a + bdiag[i+1]+1;
559       pj = b->j + bdiag[i+1]+1;
560       nz = bdiag[i] - bdiag[i+1]-1;
561       for (j=0; j<nz; j++) {
562         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
563       }
564 
565       sctx.rs  = rs;
566       sctx.pv  = rtmp[i];
567       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
568       if(sctx.newshift) break; /* break for-loop */
569       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
570 
571       /* Mark diagonal and invert diagonal for simplier triangular solves */
572       pv  = b->a + bdiag[i];
573       *pv = 1.0/rtmp[i];
574 
575     } /* endof for (i=0; i<n; i++){ */
576 
577     /* MatPivotRefine() */
578     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
579       /*
580        * if no shift in this attempt & shifting & started shifting & can refine,
581        * then try lower shift
582        */
583       sctx.shift_hi       = sctx.shift_fraction;
584       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
585       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
586       sctx.newshift       = PETSC_TRUE;
587       sctx.nshift++;
588     }
589   } while (sctx.newshift);
590 
591   ierr = PetscFree(rtmp);CHKERRQ(ierr);
592   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
593   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
594 
595   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
596   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
597   if (row_identity && col_identity) {
598     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
599   } else {
600     C->ops->solve = MatSolve_SeqAIJ;
601   }
602   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
603   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
604   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
605   C->ops->matsolve           = MatMatSolve_SeqAIJ;
606   C->assembled    = PETSC_TRUE;
607   C->preallocated = PETSC_TRUE;
608   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
609 
610   /* MatShiftView(A,info,&sctx) */
611   if (sctx.nshift){
612     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
613       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
614     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
615       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
616     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
617       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
618     }
619   }
620   ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
626 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
627 {
628   Mat             C=B;
629   Mat_SeqAIJ      *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
630   IS              isrow = b->row,isicol = b->icol;
631   PetscErrorCode  ierr;
632   const PetscInt   *r,*ic,*ics;
633   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
634   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
635   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
636   MatScalar       *pv,*rtmp,*pc,multiplier,d;
637   const MatScalar *v,*aa=a->a;
638   PetscReal       rs=0.0;
639   FactorShiftCtx  sctx;
640   const PetscInt  *ddiag;
641   PetscBool       row_identity, col_identity;
642 
643   PetscFunctionBegin;
644   /* MatPivotSetUp(): initialize shift context sctx */
645   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
646 
647   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
648     ddiag          = a->diag;
649     sctx.shift_top = info->zeropivot;
650     for (i=0; i<n; i++) {
651       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
652       d  = (aa)[ddiag[i]];
653       rs = -PetscAbsScalar(d) - PetscRealPart(d);
654       v  = aa+ai[i];
655       nz = ai[i+1] - ai[i];
656       for (j=0; j<nz; j++)
657 	rs += PetscAbsScalar(v[j]);
658       if (rs>sctx.shift_top) sctx.shift_top = rs;
659     }
660     sctx.shift_top   *= 1.1;
661     sctx.nshift_max   = 5;
662     sctx.shift_lo     = 0.;
663     sctx.shift_hi     = 1.;
664   }
665 
666   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
667   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
668   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
669   ics  = ic;
670 
671   do {
672     sctx.newshift = PETSC_FALSE;
673     for (i=0; i<n; i++){
674       nz    = bi[i+1] - bi[i];
675       bjtmp = bj + bi[i];
676       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
677 
678       /* load in initial (unfactored row) */
679       nz    = ai[r[i]+1] - ai[r[i]];
680       ajtmp = aj + ai[r[i]];
681       v     = aa + ai[r[i]];
682       for (j=0; j<nz; j++) {
683         rtmp[ics[ajtmp[j]]] = v[j];
684       }
685       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
686 
687       row = *bjtmp++;
688       while  (row < i) {
689         pc = rtmp + row;
690         if (*pc != 0.0) {
691           pv         = b->a + diag_offset[row];
692           pj         = b->j + diag_offset[row] + 1;
693           multiplier = *pc / *pv++;
694           *pc        = multiplier;
695           nz         = bi[row+1] - diag_offset[row] - 1;
696           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
697           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
698         }
699         row = *bjtmp++;
700       }
701       /* finished row so stick it into b->a */
702       pv   = b->a + bi[i] ;
703       pj   = b->j + bi[i] ;
704       nz   = bi[i+1] - bi[i];
705       diag = diag_offset[i] - bi[i];
706       rs   = 0.0;
707       for (j=0; j<nz; j++) {
708         pv[j] = rtmp[pj[j]];
709         rs   += PetscAbsScalar(pv[j]);
710       }
711       rs   -= PetscAbsScalar(pv[diag]);
712 
713       sctx.rs  = rs;
714       sctx.pv  = pv[diag];
715       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
716       if (sctx.newshift) break;
717       pv[diag] = sctx.pv;
718     }
719 
720     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
721       /*
722        * if no shift in this attempt & shifting & started shifting & can refine,
723        * then try lower shift
724        */
725       sctx.shift_hi       = sctx.shift_fraction;
726       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
727       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
728       sctx.newshift       = PETSC_TRUE;
729       sctx.nshift++;
730     }
731   } while (sctx.newshift);
732 
733   /* invert diagonal entries for simplier triangular solves */
734   for (i=0; i<n; i++) {
735     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
736   }
737   ierr = PetscFree(rtmp);CHKERRQ(ierr);
738   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
739   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
740 
741   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
742   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
743   if (row_identity && col_identity) {
744     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_inplace;
745   } else {
746     C->ops->solve   = MatSolve_SeqAIJ_inplace;
747   }
748   C->ops->solveadd           = MatSolveAdd_SeqAIJ_inplace;
749   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ_inplace;
750   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ_inplace;
751   C->ops->matsolve           = MatMatSolve_SeqAIJ_inplace;
752   C->assembled    = PETSC_TRUE;
753   C->preallocated = PETSC_TRUE;
754   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
755   if (sctx.nshift){
756      if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
757       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
758     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
759       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
760     }
761   }
762   (C)->ops->solve            = MatSolve_SeqAIJ_inplace;
763   (C)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ_inplace;
764   ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 /*
769    This routine implements inplace ILU(0) with row or/and column permutations.
770    Input:
771      A - original matrix
772    Output;
773      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
774          a->j (col index) is permuted by the inverse of colperm, then sorted
775          a->a reordered accordingly with a->j
776          a->diag (ptr to diagonal elements) is updated.
777 */
778 #undef __FUNCT__
779 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
780 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
781 {
782   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
783   IS             isrow = a->row,isicol = a->icol;
784   PetscErrorCode ierr;
785   const PetscInt *r,*ic,*ics;
786   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
787   PetscInt       *ajtmp,nz,row;
788   PetscInt       *diag = a->diag,nbdiag,*pj;
789   PetscScalar    *rtmp,*pc,multiplier,d;
790   MatScalar      *pv,*v;
791   PetscReal      rs;
792   FactorShiftCtx sctx;
793   const  MatScalar *aa=a->a,*vtmp;
794 
795   PetscFunctionBegin;
796   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
797 
798   /* MatPivotSetUp(): initialize shift context sctx */
799   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
800 
801   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
802     const PetscInt   *ddiag = a->diag;
803     sctx.shift_top = info->zeropivot;
804     for (i=0; i<n; i++) {
805       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
806       d  = (aa)[ddiag[i]];
807       rs = -PetscAbsScalar(d) - PetscRealPart(d);
808       vtmp  = aa+ai[i];
809       nz = ai[i+1] - ai[i];
810       for (j=0; j<nz; j++)
811 	rs += PetscAbsScalar(vtmp[j]);
812       if (rs>sctx.shift_top) sctx.shift_top = rs;
813     }
814     sctx.shift_top   *= 1.1;
815     sctx.nshift_max   = 5;
816     sctx.shift_lo     = 0.;
817     sctx.shift_hi     = 1.;
818   }
819 
820   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
821   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
822   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
823   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
824   ics = ic;
825 
826 #if defined(MV)
827   sctx.shift_top      = 0.;
828   sctx.nshift_max     = 0;
829   sctx.shift_lo       = 0.;
830   sctx.shift_hi       = 0.;
831   sctx.shift_fraction = 0.;
832 
833   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
834     sctx.shift_top = 0.;
835     for (i=0; i<n; i++) {
836       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
837       d  = (a->a)[diag[i]];
838       rs = -PetscAbsScalar(d) - PetscRealPart(d);
839       v  = a->a+ai[i];
840       nz = ai[i+1] - ai[i];
841       for (j=0; j<nz; j++)
842 	rs += PetscAbsScalar(v[j]);
843       if (rs>sctx.shift_top) sctx.shift_top = rs;
844     }
845     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
846     sctx.shift_top    *= 1.1;
847     sctx.nshift_max   = 5;
848     sctx.shift_lo     = 0.;
849     sctx.shift_hi     = 1.;
850   }
851 
852   sctx.shift_amount = 0.;
853   sctx.nshift       = 0;
854 #endif
855 
856   do {
857     sctx.newshift = PETSC_FALSE;
858     for (i=0; i<n; i++){
859       /* load in initial unfactored row */
860       nz    = ai[r[i]+1] - ai[r[i]];
861       ajtmp = aj + ai[r[i]];
862       v     = a->a + ai[r[i]];
863       /* sort permuted ajtmp and values v accordingly */
864       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
865       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
866 
867       diag[r[i]] = ai[r[i]];
868       for (j=0; j<nz; j++) {
869         rtmp[ajtmp[j]] = v[j];
870         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
871       }
872       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
873 
874       row = *ajtmp++;
875       while  (row < i) {
876         pc = rtmp + row;
877         if (*pc != 0.0) {
878           pv         = a->a + diag[r[row]];
879           pj         = aj + diag[r[row]] + 1;
880 
881           multiplier = *pc / *pv++;
882           *pc        = multiplier;
883           nz         = ai[r[row]+1] - diag[r[row]] - 1;
884           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
885           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
886         }
887         row = *ajtmp++;
888       }
889       /* finished row so overwrite it onto a->a */
890       pv   = a->a + ai[r[i]] ;
891       pj   = aj + ai[r[i]] ;
892       nz   = ai[r[i]+1] - ai[r[i]];
893       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
894 
895       rs   = 0.0;
896       for (j=0; j<nz; j++) {
897         pv[j] = rtmp[pj[j]];
898         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
899       }
900 
901       sctx.rs  = rs;
902       sctx.pv  = pv[nbdiag];
903       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
904       if (sctx.newshift) break;
905       pv[nbdiag] = sctx.pv;
906     }
907 
908     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
909       /*
910        * if no shift in this attempt & shifting & started shifting & can refine,
911        * then try lower shift
912        */
913       sctx.shift_hi        = sctx.shift_fraction;
914       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
915       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
916       sctx.newshift         = PETSC_TRUE;
917       sctx.nshift++;
918     }
919   } while (sctx.newshift);
920 
921   /* invert diagonal entries for simplier triangular solves */
922   for (i=0; i<n; i++) {
923     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
924   }
925 
926   ierr = PetscFree(rtmp);CHKERRQ(ierr);
927   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
928   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
929   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
930   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
931   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
932   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
933   A->assembled = PETSC_TRUE;
934   A->preallocated = PETSC_TRUE;
935   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
936   if (sctx.nshift){
937     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
938       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
939     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
940       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
941     }
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 /* ----------------------------------------------------------- */
947 #undef __FUNCT__
948 #define __FUNCT__ "MatLUFactor_SeqAIJ"
949 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
950 {
951   PetscErrorCode ierr;
952   Mat            C;
953 
954   PetscFunctionBegin;
955   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
956   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
957   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
958   A->ops->solve            = C->ops->solve;
959   A->ops->solvetranspose   = C->ops->solvetranspose;
960   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
961   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 /* ----------------------------------------------------------- */
965 
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
969 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
970 {
971   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
972   IS                iscol = a->col,isrow = a->row;
973   PetscErrorCode    ierr;
974   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
975   PetscInt          nz;
976   const PetscInt    *rout,*cout,*r,*c;
977   PetscScalar       *x,*tmp,*tmps,sum;
978   const PetscScalar *b;
979   const MatScalar   *aa = a->a,*v;
980 
981   PetscFunctionBegin;
982   if (!n) PetscFunctionReturn(0);
983 
984   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
985   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
986   tmp  = a->solve_work;
987 
988   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
989   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
990 
991   /* forward solve the lower triangular */
992   tmp[0] = b[*r++];
993   tmps   = tmp;
994   for (i=1; i<n; i++) {
995     v   = aa + ai[i] ;
996     vi  = aj + ai[i] ;
997     nz  = a->diag[i] - ai[i];
998     sum = b[*r++];
999     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1000     tmp[i] = sum;
1001   }
1002 
1003   /* backward solve the upper triangular */
1004   for (i=n-1; i>=0; i--){
1005     v   = aa + a->diag[i] + 1;
1006     vi  = aj + a->diag[i] + 1;
1007     nz  = ai[i+1] - a->diag[i] - 1;
1008     sum = tmp[i];
1009     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1010     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1011   }
1012 
1013   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1014   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1015   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1016   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1017   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1023 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1024 {
1025   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1026   IS              iscol = a->col,isrow = a->row;
1027   PetscErrorCode  ierr;
1028   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1029   PetscInt        nz,neq;
1030   const PetscInt  *rout,*cout,*r,*c;
1031   PetscScalar     *x,*b,*tmp,*tmps,sum;
1032   const MatScalar *aa = a->a,*v;
1033   PetscBool       bisdense,xisdense;
1034 
1035   PetscFunctionBegin;
1036   if (!n) PetscFunctionReturn(0);
1037 
1038   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1039   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1040   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1041   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1042 
1043   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1044   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1045 
1046   tmp  = a->solve_work;
1047   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1048   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1049 
1050   for (neq=0; neq<B->cmap->n; neq++){
1051     /* forward solve the lower triangular */
1052     tmp[0] = b[r[0]];
1053     tmps   = tmp;
1054     for (i=1; i<n; i++) {
1055       v   = aa + ai[i] ;
1056       vi  = aj + ai[i] ;
1057       nz  = a->diag[i] - ai[i];
1058       sum = b[r[i]];
1059       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1060       tmp[i] = sum;
1061     }
1062     /* backward solve the upper triangular */
1063     for (i=n-1; i>=0; i--){
1064       v   = aa + a->diag[i] + 1;
1065       vi  = aj + a->diag[i] + 1;
1066       nz  = ai[i+1] - a->diag[i] - 1;
1067       sum = tmp[i];
1068       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1069       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1070     }
1071 
1072     b += n;
1073     x += n;
1074   }
1075   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1076   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1077   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1078   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1079   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1085 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1086 {
1087   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1088   IS              iscol = a->col,isrow = a->row;
1089   PetscErrorCode  ierr;
1090   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1091   PetscInt        nz,neq;
1092   const PetscInt  *rout,*cout,*r,*c;
1093   PetscScalar     *x,*b,*tmp,sum;
1094   const MatScalar *aa = a->a,*v;
1095   PetscBool       bisdense,xisdense;
1096 
1097   PetscFunctionBegin;
1098   if (!n) PetscFunctionReturn(0);
1099 
1100   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1101   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1102   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1103   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1104 
1105   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1106   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1107 
1108   tmp  = a->solve_work;
1109   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1110   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1111 
1112   for (neq=0; neq<B->cmap->n; neq++){
1113     /* forward solve the lower triangular */
1114     tmp[0] = b[r[0]];
1115     v      = aa;
1116     vi     = aj;
1117     for (i=1; i<n; i++) {
1118       nz  = ai[i+1] - ai[i];
1119       sum = b[r[i]];
1120       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1121       tmp[i] = sum;
1122       v += nz; vi += nz;
1123     }
1124 
1125     /* backward solve the upper triangular */
1126     for (i=n-1; i>=0; i--){
1127       v   = aa + adiag[i+1]+1;
1128       vi  = aj + adiag[i+1]+1;
1129       nz  = adiag[i]-adiag[i+1]-1;
1130       sum = tmp[i];
1131       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1132       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1133     }
1134 
1135     b += n;
1136     x += n;
1137   }
1138   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1139   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1140   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1141   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1142   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1148 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1149 {
1150   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1151   IS              iscol = a->col,isrow = a->row;
1152   PetscErrorCode  ierr;
1153   const PetscInt  *r,*c,*rout,*cout;
1154   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1155   PetscInt        nz,row;
1156   PetscScalar     *x,*b,*tmp,*tmps,sum;
1157   const MatScalar *aa = a->a,*v;
1158 
1159   PetscFunctionBegin;
1160   if (!n) PetscFunctionReturn(0);
1161 
1162   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1163   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1164   tmp  = a->solve_work;
1165 
1166   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1167   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1168 
1169   /* forward solve the lower triangular */
1170   tmp[0] = b[*r++];
1171   tmps   = tmp;
1172   for (row=1; row<n; row++) {
1173     i   = rout[row]; /* permuted row */
1174     v   = aa + ai[i] ;
1175     vi  = aj + ai[i] ;
1176     nz  = a->diag[i] - ai[i];
1177     sum = b[*r++];
1178     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1179     tmp[row] = sum;
1180   }
1181 
1182   /* backward solve the upper triangular */
1183   for (row=n-1; row>=0; row--){
1184     i   = rout[row]; /* permuted row */
1185     v   = aa + a->diag[i] + 1;
1186     vi  = aj + a->diag[i] + 1;
1187     nz  = ai[i+1] - a->diag[i] - 1;
1188     sum = tmp[row];
1189     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1190     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1191   }
1192 
1193   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1194   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1195   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1196   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1197   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 /* ----------------------------------------------------------- */
1202 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1205 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1206 {
1207   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1208   PetscErrorCode    ierr;
1209   PetscInt          n = A->rmap->n;
1210   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1211   PetscScalar       *x;
1212   const PetscScalar *b;
1213   const MatScalar   *aa = a->a;
1214 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1215   PetscInt          adiag_i,i,nz,ai_i;
1216   const PetscInt    *vi;
1217   const MatScalar   *v;
1218   PetscScalar       sum;
1219 #endif
1220 
1221   PetscFunctionBegin;
1222   if (!n) PetscFunctionReturn(0);
1223 
1224   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1225   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1226 
1227 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1228   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1229 #else
1230   /* forward solve the lower triangular */
1231   x[0] = b[0];
1232   for (i=1; i<n; i++) {
1233     ai_i = ai[i];
1234     v    = aa + ai_i;
1235     vi   = aj + ai_i;
1236     nz   = adiag[i] - ai_i;
1237     sum  = b[i];
1238     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1239     x[i] = sum;
1240   }
1241 
1242   /* backward solve the upper triangular */
1243   for (i=n-1; i>=0; i--){
1244     adiag_i = adiag[i];
1245     v       = aa + adiag_i + 1;
1246     vi      = aj + adiag_i + 1;
1247     nz      = ai[i+1] - adiag_i - 1;
1248     sum     = x[i];
1249     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1250     x[i]    = sum*aa[adiag_i];
1251   }
1252 #endif
1253   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1254   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1255   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1261 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1262 {
1263   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1264   IS                iscol = a->col,isrow = a->row;
1265   PetscErrorCode    ierr;
1266   PetscInt          i, n = A->rmap->n,j;
1267   PetscInt          nz;
1268   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1269   PetscScalar       *x,*tmp,sum;
1270   const PetscScalar *b;
1271   const MatScalar   *aa = a->a,*v;
1272 
1273   PetscFunctionBegin;
1274   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1275 
1276   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1277   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1278   tmp  = a->solve_work;
1279 
1280   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1281   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1282 
1283   /* forward solve the lower triangular */
1284   tmp[0] = b[*r++];
1285   for (i=1; i<n; i++) {
1286     v   = aa + ai[i] ;
1287     vi  = aj + ai[i] ;
1288     nz  = a->diag[i] - ai[i];
1289     sum = b[*r++];
1290     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1291     tmp[i] = sum;
1292   }
1293 
1294   /* backward solve the upper triangular */
1295   for (i=n-1; i>=0; i--){
1296     v   = aa + a->diag[i] + 1;
1297     vi  = aj + a->diag[i] + 1;
1298     nz  = ai[i+1] - a->diag[i] - 1;
1299     sum = tmp[i];
1300     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1301     tmp[i] = sum*aa[a->diag[i]];
1302     x[*c--] += tmp[i];
1303   }
1304 
1305   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1306   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1307   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1308   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1309   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1310 
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1316 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1317 {
1318   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1319   IS                iscol = a->col,isrow = a->row;
1320   PetscErrorCode    ierr;
1321   PetscInt          i, n = A->rmap->n,j;
1322   PetscInt          nz;
1323   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1324   PetscScalar       *x,*tmp,sum;
1325   const PetscScalar *b;
1326   const MatScalar   *aa = a->a,*v;
1327 
1328   PetscFunctionBegin;
1329   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1330 
1331   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1332   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1333   tmp  = a->solve_work;
1334 
1335   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1336   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1337 
1338   /* forward solve the lower triangular */
1339   tmp[0] = b[r[0]];
1340   v      = aa;
1341   vi     = aj;
1342   for (i=1; i<n; i++) {
1343     nz  = ai[i+1] - ai[i];
1344     sum = b[r[i]];
1345     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1346     tmp[i] = sum;
1347     v += nz; vi += nz;
1348   }
1349 
1350   /* backward solve the upper triangular */
1351   v  = aa + adiag[n-1];
1352   vi = aj + adiag[n-1];
1353   for (i=n-1; i>=0; i--){
1354     nz  = adiag[i] - adiag[i+1] - 1;
1355     sum = tmp[i];
1356     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1357     tmp[i] = sum*v[nz];
1358     x[c[i]] += tmp[i];
1359     v += nz+1; vi += nz+1;
1360   }
1361 
1362   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1363   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1364   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1365   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1366   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1367 
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1373 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1374 {
1375   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1376   IS                iscol = a->col,isrow = a->row;
1377   PetscErrorCode    ierr;
1378   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1379   PetscInt          i,n = A->rmap->n,j;
1380   PetscInt          nz;
1381   PetscScalar       *x,*tmp,s1;
1382   const MatScalar   *aa = a->a,*v;
1383   const PetscScalar *b;
1384 
1385   PetscFunctionBegin;
1386   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1387   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1388   tmp  = a->solve_work;
1389 
1390   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1391   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1392 
1393   /* copy the b into temp work space according to permutation */
1394   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1395 
1396   /* forward solve the U^T */
1397   for (i=0; i<n; i++) {
1398     v   = aa + diag[i] ;
1399     vi  = aj + diag[i] + 1;
1400     nz  = ai[i+1] - diag[i] - 1;
1401     s1  = tmp[i];
1402     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1403     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1404     tmp[i] = s1;
1405   }
1406 
1407   /* backward solve the L^T */
1408   for (i=n-1; i>=0; i--){
1409     v   = aa + diag[i] - 1 ;
1410     vi  = aj + diag[i] - 1 ;
1411     nz  = diag[i] - ai[i];
1412     s1  = tmp[i];
1413     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1414   }
1415 
1416   /* copy tmp into x according to permutation */
1417   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1418 
1419   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1420   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1421   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1422   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1423 
1424   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1430 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1431 {
1432   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1433   IS                iscol = a->col,isrow = a->row;
1434   PetscErrorCode    ierr;
1435   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1436   PetscInt          i,n = A->rmap->n,j;
1437   PetscInt          nz;
1438   PetscScalar       *x,*tmp,s1;
1439   const MatScalar   *aa = a->a,*v;
1440   const PetscScalar *b;
1441 
1442   PetscFunctionBegin;
1443   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1444   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1445   tmp  = a->solve_work;
1446 
1447   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1448   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1449 
1450   /* copy the b into temp work space according to permutation */
1451   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1452 
1453   /* forward solve the U^T */
1454   for (i=0; i<n; i++) {
1455     v   = aa + adiag[i+1] + 1;
1456     vi  = aj + adiag[i+1] + 1;
1457     nz  = adiag[i] - adiag[i+1] - 1;
1458     s1  = tmp[i];
1459     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1460     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1461     tmp[i] = s1;
1462   }
1463 
1464   /* backward solve the L^T */
1465   for (i=n-1; i>=0; i--){
1466     v   = aa + ai[i];
1467     vi  = aj + ai[i];
1468     nz  = ai[i+1] - ai[i];
1469     s1  = tmp[i];
1470     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1471   }
1472 
1473   /* copy tmp into x according to permutation */
1474   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1475 
1476   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1477   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1478   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1479   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1480 
1481   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1487 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1488 {
1489   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1490   IS                iscol = a->col,isrow = a->row;
1491   PetscErrorCode    ierr;
1492   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1493   PetscInt          i,n = A->rmap->n,j;
1494   PetscInt          nz;
1495   PetscScalar       *x,*tmp,s1;
1496   const MatScalar   *aa = a->a,*v;
1497   const PetscScalar *b;
1498 
1499   PetscFunctionBegin;
1500   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1501   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1502   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1503   tmp  = a->solve_work;
1504 
1505   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1506   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1507 
1508   /* copy the b into temp work space according to permutation */
1509   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1510 
1511   /* forward solve the U^T */
1512   for (i=0; i<n; i++) {
1513     v   = aa + diag[i] ;
1514     vi  = aj + diag[i] + 1;
1515     nz  = ai[i+1] - diag[i] - 1;
1516     s1  = tmp[i];
1517     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1518     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1519     tmp[i] = s1;
1520   }
1521 
1522   /* backward solve the L^T */
1523   for (i=n-1; i>=0; i--){
1524     v   = aa + diag[i] - 1 ;
1525     vi  = aj + diag[i] - 1 ;
1526     nz  = diag[i] - ai[i];
1527     s1  = tmp[i];
1528     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1529   }
1530 
1531   /* copy tmp into x according to permutation */
1532   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1533 
1534   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1535   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1536   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1537   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1538 
1539   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1545 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1546 {
1547   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1548   IS                iscol = a->col,isrow = a->row;
1549   PetscErrorCode    ierr;
1550   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1551   PetscInt          i,n = A->rmap->n,j;
1552   PetscInt          nz;
1553   PetscScalar       *x,*tmp,s1;
1554   const MatScalar   *aa = a->a,*v;
1555   const PetscScalar *b;
1556 
1557   PetscFunctionBegin;
1558   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1559   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1560   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1561   tmp  = a->solve_work;
1562 
1563   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1564   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1565 
1566   /* copy the b into temp work space according to permutation */
1567   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1568 
1569   /* forward solve the U^T */
1570   for (i=0; i<n; i++) {
1571     v   = aa + adiag[i+1] + 1;
1572     vi  = aj + adiag[i+1] + 1;
1573     nz  = adiag[i] - adiag[i+1] - 1;
1574     s1  = tmp[i];
1575     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1576     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1577     tmp[i] = s1;
1578   }
1579 
1580 
1581   /* backward solve the L^T */
1582   for (i=n-1; i>=0; i--){
1583     v   = aa + ai[i] ;
1584     vi  = aj + ai[i];
1585     nz  = ai[i+1] - ai[i];
1586     s1  = tmp[i];
1587     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1588   }
1589 
1590   /* copy tmp into x according to permutation */
1591   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1592 
1593   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1594   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1595   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1596   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1597 
1598   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 /* ----------------------------------------------------------------*/
1603 
1604 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool );
1605 
1606 /*
1607    ilu() under revised new data structure.
1608    Factored arrays bj and ba are stored as
1609      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1610 
1611    bi=fact->i is an array of size n+1, in which
1612    bi+
1613      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1614      bi[n]:  points to L(n-1,n-1)+1
1615 
1616   bdiag=fact->diag is an array of size n+1,in which
1617      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1618      bdiag[n]: points to entry of U(n-1,0)-1
1619 
1620    U(i,:) contains bdiag[i] as its last entry, i.e.,
1621     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1622 */
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1625 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1626 {
1627 
1628   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1629   PetscErrorCode     ierr;
1630   const PetscInt     n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1631   PetscInt           i,j,k=0,nz,*bi,*bj,*bdiag;
1632   PetscBool          missing;
1633   IS                 isicol;
1634 
1635   PetscFunctionBegin;
1636   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1637   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1638   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1639   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1640   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1641   b    = (Mat_SeqAIJ*)(fact)->data;
1642 
1643   /* allocate matrix arrays for new data structure */
1644   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1645   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1646   b->singlemalloc = PETSC_TRUE;
1647   if (!b->diag){
1648     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1649     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1650   }
1651   bdiag = b->diag;
1652 
1653   if (n > 0) {
1654     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1655   }
1656 
1657   /* set bi and bj with new data structure */
1658   bi = b->i;
1659   bj = b->j;
1660 
1661   /* L part */
1662   bi[0] = 0;
1663   for (i=0; i<n; i++){
1664     nz = adiag[i] - ai[i];
1665     bi[i+1] = bi[i] + nz;
1666     aj = a->j + ai[i];
1667     for (j=0; j<nz; j++){
1668       /*   *bj = aj[j]; bj++; */
1669       bj[k++] = aj[j];
1670     }
1671   }
1672 
1673   /* U part */
1674   bdiag[n] = bi[n]-1;
1675   for (i=n-1; i>=0; i--){
1676     nz = ai[i+1] - adiag[i] - 1;
1677     aj = a->j + adiag[i] + 1;
1678     for (j=0; j<nz; j++){
1679       /*      *bj = aj[j]; bj++; */
1680       bj[k++] = aj[j];
1681     }
1682     /* diag[i] */
1683     /*    *bj = i; bj++; */
1684     bj[k++] = i;
1685     bdiag[i] = bdiag[i+1] + nz + 1;
1686   }
1687 
1688   fact->factortype             = MAT_FACTOR_ILU;
1689   fact->info.factor_mallocs    = 0;
1690   fact->info.fill_ratio_given  = info->fill;
1691   fact->info.fill_ratio_needed = 1.0;
1692   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1693 
1694   b       = (Mat_SeqAIJ*)(fact)->data;
1695   b->row  = isrow;
1696   b->col  = iscol;
1697   b->icol = isicol;
1698   ierr    = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1699   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1700   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1706 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1707 {
1708   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1709   IS                 isicol;
1710   PetscErrorCode     ierr;
1711   const PetscInt     *r,*ic;
1712   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1713   PetscInt           *bi,*cols,nnz,*cols_lvl;
1714   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1715   PetscInt           i,levels,diagonal_fill;
1716   PetscBool          col_identity,row_identity;
1717   PetscReal          f;
1718   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1719   PetscBT            lnkbt;
1720   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1721   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1722   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1723 
1724   PetscFunctionBegin;
1725   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1726   /*
1727   PetscBool          olddatastruct=PETSC_FALSE;
1728   ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
1729   if(olddatastruct){
1730     ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1731     PetscFunctionReturn(0);
1732   }
1733   */
1734 
1735   levels = (PetscInt)info->levels;
1736   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1737   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1738   if (!levels && row_identity && col_identity) {
1739     /* special case: ilu(0) with natural ordering */
1740     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1741     if (a->inode.size) {
1742       fact->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1743     }
1744     PetscFunctionReturn(0);
1745   }
1746 
1747   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1748   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1749   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1750   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1751 
1752   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1753   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1754   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1755   bi[0] = bdiag[0] = 0;
1756   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1757 
1758   /* create a linked list for storing column indices of the active row */
1759   nlnk = n + 1;
1760   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1761 
1762   /* initial FreeSpace size is f*(ai[n]+1) */
1763   f             = info->fill;
1764   diagonal_fill = (PetscInt)info->diagonal_fill;
1765   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1766   current_space = free_space;
1767   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1768   current_space_lvl = free_space_lvl;
1769   for (i=0; i<n; i++) {
1770     nzi = 0;
1771     /* copy current row into linked list */
1772     nnz  = ai[r[i]+1] - ai[r[i]];
1773     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1774     cols = aj + ai[r[i]];
1775     lnk[i] = -1; /* marker to indicate if diagonal exists */
1776     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1777     nzi += nlnk;
1778 
1779     /* make sure diagonal entry is included */
1780     if (diagonal_fill && lnk[i] == -1) {
1781       fm = n;
1782       while (lnk[fm] < i) fm = lnk[fm];
1783       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1784       lnk[fm]    = i;
1785       lnk_lvl[i] = 0;
1786       nzi++; dcount++;
1787     }
1788 
1789     /* add pivot rows into the active row */
1790     nzbd = 0;
1791     prow = lnk[n];
1792     while (prow < i) {
1793       nnz      = bdiag[prow];
1794       cols     = bj_ptr[prow] + nnz + 1;
1795       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1796       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1797       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1798       nzi += nlnk;
1799       prow = lnk[prow];
1800       nzbd++;
1801     }
1802     bdiag[i] = nzbd;
1803     bi[i+1]  = bi[i] + nzi;
1804     /* if free space is not available, make more free space */
1805     if (current_space->local_remaining<nzi) {
1806       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1807       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1808       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1809       reallocs++;
1810     }
1811 
1812     /* copy data into free_space and free_space_lvl, then initialize lnk */
1813     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1814     bj_ptr[i]    = current_space->array;
1815     bjlvl_ptr[i] = current_space_lvl->array;
1816 
1817     /* make sure the active row i has diagonal entry */
1818     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1819 
1820     current_space->array           += nzi;
1821     current_space->local_used      += nzi;
1822     current_space->local_remaining -= nzi;
1823     current_space_lvl->array           += nzi;
1824     current_space_lvl->local_used      += nzi;
1825     current_space_lvl->local_remaining -= nzi;
1826   }
1827 
1828   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1829   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1830   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1831   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1832   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1833 
1834   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1835   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1836   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1837 
1838 #if defined(PETSC_USE_INFO)
1839   {
1840     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1841     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1842     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1843     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1844     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1845     if (diagonal_fill) {
1846       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1847     }
1848   }
1849 #endif
1850   /* put together the new matrix */
1851   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1852   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1853   b = (Mat_SeqAIJ*)(fact)->data;
1854   b->free_a       = PETSC_TRUE;
1855   b->free_ij      = PETSC_TRUE;
1856   b->singlemalloc = PETSC_FALSE;
1857   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1858   b->j          = bj;
1859   b->i          = bi;
1860   b->diag       = bdiag;
1861   b->ilen       = 0;
1862   b->imax       = 0;
1863   b->row        = isrow;
1864   b->col        = iscol;
1865   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1866   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1867   b->icol       = isicol;
1868   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1869   /* In b structure:  Free imax, ilen, old a, old j.
1870      Allocate bdiag, solve_work, new a, new j */
1871   ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1872   b->maxnz = b->nz = bdiag[0]+1;
1873   (fact)->info.factor_mallocs    = reallocs;
1874   (fact)->info.fill_ratio_given  = f;
1875   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1876   (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1877   if (a->inode.size) {
1878     (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1879   }
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1885 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1886 {
1887   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1888   IS                 isicol;
1889   PetscErrorCode     ierr;
1890   const PetscInt     *r,*ic;
1891   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1892   PetscInt           *bi,*cols,nnz,*cols_lvl;
1893   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1894   PetscInt           i,levels,diagonal_fill;
1895   PetscBool          col_identity,row_identity;
1896   PetscReal          f;
1897   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1898   PetscBT            lnkbt;
1899   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1900   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1901   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1902   PetscBool          missing;
1903 
1904   PetscFunctionBegin;
1905   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1906   f             = info->fill;
1907   levels        = (PetscInt)info->levels;
1908   diagonal_fill = (PetscInt)info->diagonal_fill;
1909   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1910 
1911   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1912   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1913   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1914     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1915     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1916     if (a->inode.size) {
1917       (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1918     }
1919     fact->factortype = MAT_FACTOR_ILU;
1920     (fact)->info.factor_mallocs    = 0;
1921     (fact)->info.fill_ratio_given  = info->fill;
1922     (fact)->info.fill_ratio_needed = 1.0;
1923     b               = (Mat_SeqAIJ*)(fact)->data;
1924     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1925     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1926     b->row              = isrow;
1927     b->col              = iscol;
1928     b->icol             = isicol;
1929     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1930     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1931     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1932     PetscFunctionReturn(0);
1933   }
1934 
1935   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1936   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1937 
1938   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1939   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1940   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1941   bi[0] = bdiag[0] = 0;
1942 
1943   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1944 
1945   /* create a linked list for storing column indices of the active row */
1946   nlnk = n + 1;
1947   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1948 
1949   /* initial FreeSpace size is f*(ai[n]+1) */
1950   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1951   current_space = free_space;
1952   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1953   current_space_lvl = free_space_lvl;
1954 
1955   for (i=0; i<n; i++) {
1956     nzi = 0;
1957     /* copy current row into linked list */
1958     nnz  = ai[r[i]+1] - ai[r[i]];
1959     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1960     cols = aj + ai[r[i]];
1961     lnk[i] = -1; /* marker to indicate if diagonal exists */
1962     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1963     nzi += nlnk;
1964 
1965     /* make sure diagonal entry is included */
1966     if (diagonal_fill && lnk[i] == -1) {
1967       fm = n;
1968       while (lnk[fm] < i) fm = lnk[fm];
1969       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1970       lnk[fm]    = i;
1971       lnk_lvl[i] = 0;
1972       nzi++; dcount++;
1973     }
1974 
1975     /* add pivot rows into the active row */
1976     nzbd = 0;
1977     prow = lnk[n];
1978     while (prow < i) {
1979       nnz      = bdiag[prow];
1980       cols     = bj_ptr[prow] + nnz + 1;
1981       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1982       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1983       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1984       nzi += nlnk;
1985       prow = lnk[prow];
1986       nzbd++;
1987     }
1988     bdiag[i] = nzbd;
1989     bi[i+1]  = bi[i] + nzi;
1990 
1991     /* if free space is not available, make more free space */
1992     if (current_space->local_remaining<nzi) {
1993       nnz = nzi*(n - i); /* estimated and max additional space needed */
1994       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1995       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1996       reallocs++;
1997     }
1998 
1999     /* copy data into free_space and free_space_lvl, then initialize lnk */
2000     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2001     bj_ptr[i]    = current_space->array;
2002     bjlvl_ptr[i] = current_space_lvl->array;
2003 
2004     /* make sure the active row i has diagonal entry */
2005     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2006 
2007     current_space->array           += nzi;
2008     current_space->local_used      += nzi;
2009     current_space->local_remaining -= nzi;
2010     current_space_lvl->array           += nzi;
2011     current_space_lvl->local_used      += nzi;
2012     current_space_lvl->local_remaining -= nzi;
2013   }
2014 
2015   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2016   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2017 
2018   /* destroy list of free space and other temporary arrays */
2019   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2020   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2021   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2022   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2023   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2024 
2025 #if defined(PETSC_USE_INFO)
2026   {
2027     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2028     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2029     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2030     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2031     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2032     if (diagonal_fill) {
2033       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2034     }
2035   }
2036 #endif
2037 
2038   /* put together the new matrix */
2039   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2040   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2041   b = (Mat_SeqAIJ*)(fact)->data;
2042   b->free_a       = PETSC_TRUE;
2043   b->free_ij      = PETSC_TRUE;
2044   b->singlemalloc = PETSC_FALSE;
2045   ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2046   b->j          = bj;
2047   b->i          = bi;
2048   for (i=0; i<n; i++) bdiag[i] += bi[i];
2049   b->diag       = bdiag;
2050   b->ilen       = 0;
2051   b->imax       = 0;
2052   b->row        = isrow;
2053   b->col        = iscol;
2054   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2055   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2056   b->icol       = isicol;
2057   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2058   /* In b structure:  Free imax, ilen, old a, old j.
2059      Allocate bdiag, solve_work, new a, new j */
2060   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2061   b->maxnz             = b->nz = bi[n] ;
2062   (fact)->info.factor_mallocs    = reallocs;
2063   (fact)->info.fill_ratio_given  = f;
2064   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2065   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
2066   if (a->inode.size) {
2067     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2068   }
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2074 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2075 {
2076   Mat            C = B;
2077   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2078   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2079   IS             ip=b->row,iip = b->icol;
2080   PetscErrorCode ierr;
2081   const PetscInt *rip,*riip;
2082   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2083   PetscInt       *ai=a->i,*aj=a->j;
2084   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2085   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2086   PetscBool      perm_identity;
2087   FactorShiftCtx sctx;
2088   PetscReal      rs;
2089   MatScalar      d,*v;
2090 
2091   PetscFunctionBegin;
2092   /* MatPivotSetUp(): initialize shift context sctx */
2093   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2094 
2095   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2096     sctx.shift_top = info->zeropivot;
2097     for (i=0; i<mbs; i++) {
2098       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2099       d  = (aa)[a->diag[i]];
2100       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2101       v  = aa+ai[i];
2102       nz = ai[i+1] - ai[i];
2103       for (j=0; j<nz; j++)
2104 	rs += PetscAbsScalar(v[j]);
2105       if (rs>sctx.shift_top) sctx.shift_top = rs;
2106     }
2107     sctx.shift_top   *= 1.1;
2108     sctx.nshift_max   = 5;
2109     sctx.shift_lo     = 0.;
2110     sctx.shift_hi     = 1.;
2111   }
2112 
2113   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2114   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2115 
2116   /* allocate working arrays
2117      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2118      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2119   */
2120   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr);
2121 
2122   do {
2123     sctx.newshift = PETSC_FALSE;
2124 
2125     for (i=0; i<mbs; i++) c2r[i] = mbs;
2126     il[0] = 0;
2127 
2128     for (k = 0; k<mbs; k++){
2129       /* zero rtmp */
2130       nz = bi[k+1] - bi[k];
2131       bjtmp = bj + bi[k];
2132       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2133 
2134       /* load in initial unfactored row */
2135       bval = ba + bi[k];
2136       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2137       for (j = jmin; j < jmax; j++){
2138         col = riip[aj[j]];
2139         if (col >= k){ /* only take upper triangular entry */
2140           rtmp[col] = aa[j];
2141           *bval++   = 0.0; /* for in-place factorization */
2142         }
2143       }
2144       /* shift the diagonal of the matrix: ZeropivotApply() */
2145       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2146 
2147       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2148       dk = rtmp[k];
2149       i  = c2r[k]; /* first row to be added to k_th row  */
2150 
2151       while (i < k){
2152         nexti = c2r[i]; /* next row to be added to k_th row */
2153 
2154         /* compute multiplier, update diag(k) and U(i,k) */
2155         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2156         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
2157         dk   += uikdi*ba[ili]; /* update diag[k] */
2158         ba[ili] = uikdi; /* -U(i,k) */
2159 
2160         /* add multiple of row i to k-th row */
2161         jmin = ili + 1; jmax = bi[i+1];
2162         if (jmin < jmax){
2163           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2164           /* update il and c2r for row i */
2165           il[i] = jmin;
2166           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2167         }
2168         i = nexti;
2169       }
2170 
2171       /* copy data into U(k,:) */
2172       rs   = 0.0;
2173       jmin = bi[k]; jmax = bi[k+1]-1;
2174       if (jmin < jmax) {
2175         for (j=jmin; j<jmax; j++){
2176           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2177         }
2178         /* add the k-th row into il and c2r */
2179         il[k] = jmin;
2180         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2181       }
2182 
2183       /* MatPivotCheck() */
2184       sctx.rs  = rs;
2185       sctx.pv  = dk;
2186       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
2187       if(sctx.newshift) break;
2188       dk = sctx.pv;
2189 
2190       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2191     }
2192   } while (sctx.newshift);
2193 
2194   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2195   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2196   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2197 
2198   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2199   if (perm_identity){
2200     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2201     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2202     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2203     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2204   } else {
2205     B->ops->solve           = MatSolve_SeqSBAIJ_1;
2206     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
2207     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
2208     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
2209   }
2210 
2211   C->assembled    = PETSC_TRUE;
2212   C->preallocated = PETSC_TRUE;
2213   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2214 
2215   /* MatPivotView() */
2216   if (sctx.nshift){
2217     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2218       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
2219     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2220       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2221     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2222       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
2223     }
2224   }
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 #undef __FUNCT__
2229 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2230 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2231 {
2232   Mat            C = B;
2233   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2234   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2235   IS             ip=b->row,iip = b->icol;
2236   PetscErrorCode ierr;
2237   const PetscInt *rip,*riip;
2238   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2239   PetscInt       *ai=a->i,*aj=a->j;
2240   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2241   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2242   PetscBool      perm_identity;
2243   FactorShiftCtx sctx;
2244   PetscReal      rs;
2245   MatScalar      d,*v;
2246 
2247   PetscFunctionBegin;
2248   /* MatPivotSetUp(): initialize shift context sctx */
2249   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2250 
2251   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2252     sctx.shift_top = info->zeropivot;
2253     for (i=0; i<mbs; i++) {
2254       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2255       d  = (aa)[a->diag[i]];
2256       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2257       v  = aa+ai[i];
2258       nz = ai[i+1] - ai[i];
2259       for (j=0; j<nz; j++)
2260 	rs += PetscAbsScalar(v[j]);
2261       if (rs>sctx.shift_top) sctx.shift_top = rs;
2262     }
2263     sctx.shift_top   *= 1.1;
2264     sctx.nshift_max   = 5;
2265     sctx.shift_lo     = 0.;
2266     sctx.shift_hi     = 1.;
2267   }
2268 
2269   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2270   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2271 
2272   /* initialization */
2273   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
2274 
2275   do {
2276     sctx.newshift = PETSC_FALSE;
2277 
2278     for (i=0; i<mbs; i++) jl[i] = mbs;
2279     il[0] = 0;
2280 
2281     for (k = 0; k<mbs; k++){
2282       /* zero rtmp */
2283       nz = bi[k+1] - bi[k];
2284       bjtmp = bj + bi[k];
2285       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2286 
2287       bval = ba + bi[k];
2288       /* initialize k-th row by the perm[k]-th row of A */
2289       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2290       for (j = jmin; j < jmax; j++){
2291         col = riip[aj[j]];
2292         if (col >= k){ /* only take upper triangular entry */
2293           rtmp[col] = aa[j];
2294           *bval++  = 0.0; /* for in-place factorization */
2295         }
2296       }
2297       /* shift the diagonal of the matrix */
2298       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2299 
2300       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2301       dk = rtmp[k];
2302       i = jl[k]; /* first row to be added to k_th row  */
2303 
2304       while (i < k){
2305         nexti = jl[i]; /* next row to be added to k_th row */
2306 
2307         /* compute multiplier, update diag(k) and U(i,k) */
2308         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2309         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
2310         dk += uikdi*ba[ili];
2311         ba[ili] = uikdi; /* -U(i,k) */
2312 
2313         /* add multiple of row i to k-th row */
2314         jmin = ili + 1; jmax = bi[i+1];
2315         if (jmin < jmax){
2316           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2317           /* update il and jl for row i */
2318           il[i] = jmin;
2319           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2320         }
2321         i = nexti;
2322       }
2323 
2324       /* shift the diagonals when zero pivot is detected */
2325       /* compute rs=sum of abs(off-diagonal) */
2326       rs   = 0.0;
2327       jmin = bi[k]+1;
2328       nz   = bi[k+1] - jmin;
2329       bcol = bj + jmin;
2330       for (j=0; j<nz; j++) {
2331         rs += PetscAbsScalar(rtmp[bcol[j]]);
2332       }
2333 
2334       sctx.rs = rs;
2335       sctx.pv = dk;
2336       ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr);
2337       if (sctx.newshift) break;
2338       dk = sctx.pv;
2339 
2340       /* copy data into U(k,:) */
2341       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2342       jmin = bi[k]+1; jmax = bi[k+1];
2343       if (jmin < jmax) {
2344         for (j=jmin; j<jmax; j++){
2345           col = bj[j]; ba[j] = rtmp[col];
2346         }
2347         /* add the k-th row into il and jl */
2348         il[k] = jmin;
2349         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2350       }
2351     }
2352   } while (sctx.newshift);
2353 
2354   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2355   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2356   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2357 
2358   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2359   if (perm_identity){
2360     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2361     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2362     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2363     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2364   } else {
2365     B->ops->solve           = MatSolve_SeqSBAIJ_1_inplace;
2366     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_inplace;
2367     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_inplace;
2368     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_inplace;
2369   }
2370 
2371   C->assembled    = PETSC_TRUE;
2372   C->preallocated = PETSC_TRUE;
2373   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2374   if (sctx.nshift){
2375     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2376       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2377     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2378       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2379     }
2380   }
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 /*
2385    icc() under revised new data structure.
2386    Factored arrays bj and ba are stored as
2387      U(0,:),...,U(i,:),U(n-1,:)
2388 
2389    ui=fact->i is an array of size n+1, in which
2390    ui+
2391      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2392      ui[n]:  points to U(n-1,n-1)+1
2393 
2394   udiag=fact->diag is an array of size n,in which
2395      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2396 
2397    U(i,:) contains udiag[i] as its last entry, i.e.,
2398     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2399 */
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2403 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2404 {
2405   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2406   Mat_SeqSBAIJ       *b;
2407   PetscErrorCode     ierr;
2408   PetscBool          perm_identity,missing;
2409   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2410   const PetscInt     *rip,*riip;
2411   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2412   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2413   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2414   PetscReal          fill=info->fill,levels=info->levels;
2415   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2416   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2417   PetscBT            lnkbt;
2418   IS                 iperm;
2419 
2420   PetscFunctionBegin;
2421   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2422   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2423   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2424   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2425   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2426 
2427   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2428   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2429   ui[0] = 0;
2430 
2431   /* ICC(0) without matrix ordering: simply rearrange column indices */
2432   if (!levels && perm_identity) {
2433     for (i=0; i<am; i++) {
2434       ncols    = ai[i+1] - a->diag[i];
2435       ui[i+1]  = ui[i] + ncols;
2436       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2437     }
2438     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2439     cols = uj;
2440     for (i=0; i<am; i++) {
2441       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2442       ncols = ai[i+1] - a->diag[i] -1;
2443       for (j=0; j<ncols; j++) *cols++ = aj[j];
2444       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2445     }
2446   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2447     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2448     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2449 
2450     /* initialization */
2451     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2452 
2453     /* jl: linked list for storing indices of the pivot rows
2454        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2455     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2456     for (i=0; i<am; i++){
2457       jl[i] = am; il[i] = 0;
2458     }
2459 
2460     /* create and initialize a linked list for storing column indices of the active row k */
2461     nlnk = am + 1;
2462     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2463 
2464     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2465     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2466     current_space = free_space;
2467     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2468     current_space_lvl = free_space_lvl;
2469 
2470     for (k=0; k<am; k++){  /* for each active row k */
2471       /* initialize lnk by the column indices of row rip[k] of A */
2472       nzk   = 0;
2473       ncols = ai[rip[k]+1] - ai[rip[k]];
2474       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2475       ncols_upper = 0;
2476       for (j=0; j<ncols; j++){
2477         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2478         if (riip[i] >= k){ /* only take upper triangular entry */
2479           ajtmp[ncols_upper] = i;
2480           ncols_upper++;
2481         }
2482       }
2483       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2484       nzk += nlnk;
2485 
2486       /* update lnk by computing fill-in for each pivot row to be merged in */
2487       prow = jl[k]; /* 1st pivot row */
2488 
2489       while (prow < k){
2490         nextprow = jl[prow];
2491 
2492         /* merge prow into k-th row */
2493         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2494         jmax = ui[prow+1];
2495         ncols = jmax-jmin;
2496         i     = jmin - ui[prow];
2497         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2498         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2499         j     = *(uj - 1);
2500         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2501         nzk += nlnk;
2502 
2503         /* update il and jl for prow */
2504         if (jmin < jmax){
2505           il[prow] = jmin;
2506           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2507         }
2508         prow = nextprow;
2509       }
2510 
2511       /* if free space is not available, make more free space */
2512       if (current_space->local_remaining<nzk) {
2513         i  = am - k + 1; /* num of unfactored rows */
2514         i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2515         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2516         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2517         reallocs++;
2518       }
2519 
2520       /* copy data into free_space and free_space_lvl, then initialize lnk */
2521       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2522       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2523 
2524       /* add the k-th row into il and jl */
2525       if (nzk > 1){
2526         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2527         jl[k] = jl[i]; jl[i] = k;
2528         il[k] = ui[k] + 1;
2529       }
2530       uj_ptr[k]     = current_space->array;
2531       uj_lvl_ptr[k] = current_space_lvl->array;
2532 
2533       current_space->array           += nzk;
2534       current_space->local_used      += nzk;
2535       current_space->local_remaining -= nzk;
2536 
2537       current_space_lvl->array           += nzk;
2538       current_space_lvl->local_used      += nzk;
2539       current_space_lvl->local_remaining -= nzk;
2540 
2541       ui[k+1] = ui[k] + nzk;
2542     }
2543 
2544     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2545     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2546     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2547     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2548 
2549     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2550     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2551     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2552     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2553     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2554 
2555   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2556 
2557   /* put together the new matrix in MATSEQSBAIJ format */
2558   b    = (Mat_SeqSBAIJ*)(fact)->data;
2559   b->singlemalloc = PETSC_FALSE;
2560   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2561   b->j    = uj;
2562   b->i    = ui;
2563   b->diag = udiag;
2564   b->free_diag = PETSC_TRUE;
2565   b->ilen = 0;
2566   b->imax = 0;
2567   b->row  = perm;
2568   b->col  = perm;
2569   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2570   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2571   b->icol = iperm;
2572   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2573   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2574   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2575   b->maxnz   = b->nz = ui[am];
2576   b->free_a  = PETSC_TRUE;
2577   b->free_ij = PETSC_TRUE;
2578 
2579   fact->info.factor_mallocs   = reallocs;
2580   fact->info.fill_ratio_given = fill;
2581   if (ai[am] != 0) {
2582     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2583     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2584   } else {
2585     fact->info.fill_ratio_needed = 0.0;
2586   }
2587 #if defined(PETSC_USE_INFO)
2588     if (ai[am] != 0) {
2589       PetscReal af = fact->info.fill_ratio_needed;
2590       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2591       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2592       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2593     } else {
2594       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2595     }
2596 #endif
2597   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 #undef __FUNCT__
2602 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2603 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2604 {
2605   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2606   Mat_SeqSBAIJ       *b;
2607   PetscErrorCode     ierr;
2608   PetscBool          perm_identity,missing;
2609   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2610   const PetscInt     *rip,*riip;
2611   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2612   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2613   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2614   PetscReal          fill=info->fill,levels=info->levels;
2615   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2616   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2617   PetscBT            lnkbt;
2618   IS                 iperm;
2619 
2620   PetscFunctionBegin;
2621   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2622   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2623   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2624   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2625   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2626 
2627   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2628   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2629   ui[0] = 0;
2630 
2631   /* ICC(0) without matrix ordering: simply copies fill pattern */
2632   if (!levels && perm_identity) {
2633 
2634     for (i=0; i<am; i++) {
2635       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2636       udiag[i] = ui[i];
2637     }
2638     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2639     cols = uj;
2640     for (i=0; i<am; i++) {
2641       aj    = a->j + a->diag[i];
2642       ncols = ui[i+1] - ui[i];
2643       for (j=0; j<ncols; j++) *cols++ = *aj++;
2644     }
2645   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2646     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2647     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2648 
2649     /* initialization */
2650     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2651 
2652     /* jl: linked list for storing indices of the pivot rows
2653        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2654     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2655     for (i=0; i<am; i++){
2656       jl[i] = am; il[i] = 0;
2657     }
2658 
2659     /* create and initialize a linked list for storing column indices of the active row k */
2660     nlnk = am + 1;
2661     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2662 
2663     /* initial FreeSpace size is fill*(ai[am]+1) */
2664     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2665     current_space = free_space;
2666     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2667     current_space_lvl = free_space_lvl;
2668 
2669     for (k=0; k<am; k++){  /* for each active row k */
2670       /* initialize lnk by the column indices of row rip[k] of A */
2671       nzk   = 0;
2672       ncols = ai[rip[k]+1] - ai[rip[k]];
2673       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2674       ncols_upper = 0;
2675       for (j=0; j<ncols; j++){
2676         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2677         if (riip[i] >= k){ /* only take upper triangular entry */
2678           ajtmp[ncols_upper] = i;
2679           ncols_upper++;
2680         }
2681       }
2682       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2683       nzk += nlnk;
2684 
2685       /* update lnk by computing fill-in for each pivot row to be merged in */
2686       prow = jl[k]; /* 1st pivot row */
2687 
2688       while (prow < k){
2689         nextprow = jl[prow];
2690 
2691         /* merge prow into k-th row */
2692         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2693         jmax = ui[prow+1];
2694         ncols = jmax-jmin;
2695         i     = jmin - ui[prow];
2696         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2697         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2698         j     = *(uj - 1);
2699         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2700         nzk += nlnk;
2701 
2702         /* update il and jl for prow */
2703         if (jmin < jmax){
2704           il[prow] = jmin;
2705           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2706         }
2707         prow = nextprow;
2708       }
2709 
2710       /* if free space is not available, make more free space */
2711       if (current_space->local_remaining<nzk) {
2712         i = am - k + 1; /* num of unfactored rows */
2713         i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2714         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2715         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2716         reallocs++;
2717       }
2718 
2719       /* copy data into free_space and free_space_lvl, then initialize lnk */
2720       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2721       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2722 
2723       /* add the k-th row into il and jl */
2724       if (nzk > 1){
2725         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2726         jl[k] = jl[i]; jl[i] = k;
2727         il[k] = ui[k] + 1;
2728       }
2729       uj_ptr[k]     = current_space->array;
2730       uj_lvl_ptr[k] = current_space_lvl->array;
2731 
2732       current_space->array           += nzk;
2733       current_space->local_used      += nzk;
2734       current_space->local_remaining -= nzk;
2735 
2736       current_space_lvl->array           += nzk;
2737       current_space_lvl->local_used      += nzk;
2738       current_space_lvl->local_remaining -= nzk;
2739 
2740       ui[k+1] = ui[k] + nzk;
2741     }
2742 
2743 #if defined(PETSC_USE_INFO)
2744     if (ai[am] != 0) {
2745       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2746       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2747       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2748       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2749     } else {
2750       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2751     }
2752 #endif
2753 
2754     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2755     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2756     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2757     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2758 
2759     /* destroy list of free space and other temporary array(s) */
2760     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2761     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2762     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2763     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2764 
2765   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2766 
2767   /* put together the new matrix in MATSEQSBAIJ format */
2768 
2769   b    = (Mat_SeqSBAIJ*)fact->data;
2770   b->singlemalloc = PETSC_FALSE;
2771   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2772   b->j    = uj;
2773   b->i    = ui;
2774   b->diag = udiag;
2775   b->free_diag = PETSC_TRUE;
2776   b->ilen = 0;
2777   b->imax = 0;
2778   b->row  = perm;
2779   b->col  = perm;
2780   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2781   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2782   b->icol = iperm;
2783   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2784   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2785   ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2786   b->maxnz   = b->nz = ui[am];
2787   b->free_a  = PETSC_TRUE;
2788   b->free_ij = PETSC_TRUE;
2789 
2790   fact->info.factor_mallocs    = reallocs;
2791   fact->info.fill_ratio_given  = fill;
2792   if (ai[am] != 0) {
2793     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2794   } else {
2795     fact->info.fill_ratio_needed = 0.0;
2796   }
2797   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 #undef __FUNCT__
2802 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2803 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2804 {
2805   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2806   Mat_SeqSBAIJ       *b;
2807   PetscErrorCode     ierr;
2808   PetscBool          perm_identity;
2809   PetscReal          fill = info->fill;
2810   const PetscInt     *rip,*riip;
2811   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2812   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2813   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2814   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2815   PetscBT            lnkbt;
2816   IS                 iperm;
2817 
2818   PetscFunctionBegin;
2819   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2820   /* check whether perm is the identity mapping */
2821   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2822   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2823   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2824   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2825 
2826   /* initialization */
2827   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2828   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2829   ui[0] = 0;
2830 
2831   /* jl: linked list for storing indices of the pivot rows
2832      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2833   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2834   for (i=0; i<am; i++){
2835     jl[i] = am; il[i] = 0;
2836   }
2837 
2838   /* create and initialize a linked list for storing column indices of the active row k */
2839   nlnk = am + 1;
2840   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2841 
2842   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2843   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2844   current_space = free_space;
2845 
2846   for (k=0; k<am; k++){  /* for each active row k */
2847     /* initialize lnk by the column indices of row rip[k] of A */
2848     nzk   = 0;
2849     ncols = ai[rip[k]+1] - ai[rip[k]];
2850     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2851     ncols_upper = 0;
2852     for (j=0; j<ncols; j++){
2853       i = riip[*(aj + ai[rip[k]] + j)];
2854       if (i >= k){ /* only take upper triangular entry */
2855         cols[ncols_upper] = i;
2856         ncols_upper++;
2857       }
2858     }
2859     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2860     nzk += nlnk;
2861 
2862     /* update lnk by computing fill-in for each pivot row to be merged in */
2863     prow = jl[k]; /* 1st pivot row */
2864 
2865     while (prow < k){
2866       nextprow = jl[prow];
2867       /* merge prow into k-th row */
2868       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2869       jmax = ui[prow+1];
2870       ncols = jmax-jmin;
2871       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2872       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2873       nzk += nlnk;
2874 
2875       /* update il and jl for prow */
2876       if (jmin < jmax){
2877         il[prow] = jmin;
2878         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2879       }
2880       prow = nextprow;
2881     }
2882 
2883     /* if free space is not available, make more free space */
2884     if (current_space->local_remaining<nzk) {
2885       i  = am - k + 1; /* num of unfactored rows */
2886       i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2887       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2888       reallocs++;
2889     }
2890 
2891     /* copy data into free space, then initialize lnk */
2892     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2893 
2894     /* add the k-th row into il and jl */
2895     if (nzk > 1){
2896       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2897       jl[k] = jl[i]; jl[i] = k;
2898       il[k] = ui[k] + 1;
2899     }
2900     ui_ptr[k] = current_space->array;
2901     current_space->array           += nzk;
2902     current_space->local_used      += nzk;
2903     current_space->local_remaining -= nzk;
2904 
2905     ui[k+1] = ui[k] + nzk;
2906   }
2907 
2908   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2909   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2910   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2911 
2912   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2913   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2914   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2915   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2916 
2917   /* put together the new matrix in MATSEQSBAIJ format */
2918 
2919   b = (Mat_SeqSBAIJ*)fact->data;
2920   b->singlemalloc = PETSC_FALSE;
2921   b->free_a       = PETSC_TRUE;
2922   b->free_ij      = PETSC_TRUE;
2923   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2924   b->j    = uj;
2925   b->i    = ui;
2926   b->diag = udiag;
2927   b->free_diag = PETSC_TRUE;
2928   b->ilen = 0;
2929   b->imax = 0;
2930   b->row  = perm;
2931   b->col  = perm;
2932   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2933   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2934   b->icol = iperm;
2935   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2936   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2937   ierr    = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2938   b->maxnz = b->nz = ui[am];
2939 
2940   fact->info.factor_mallocs    = reallocs;
2941   fact->info.fill_ratio_given  = fill;
2942   if (ai[am] != 0) {
2943     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2944     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2945   } else {
2946     fact->info.fill_ratio_needed = 0.0;
2947   }
2948 #if defined(PETSC_USE_INFO)
2949   if (ai[am] != 0) {
2950     PetscReal af = fact->info.fill_ratio_needed;
2951     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2952     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2953     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2954   } else {
2955      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2956   }
2957 #endif
2958   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 #undef __FUNCT__
2963 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2964 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2965 {
2966   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2967   Mat_SeqSBAIJ       *b;
2968   PetscErrorCode     ierr;
2969   PetscBool          perm_identity;
2970   PetscReal          fill = info->fill;
2971   const PetscInt     *rip,*riip;
2972   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2973   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2974   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2975   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2976   PetscBT            lnkbt;
2977   IS                 iperm;
2978 
2979   PetscFunctionBegin;
2980   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2981   /* check whether perm is the identity mapping */
2982   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2983   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2984   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2985   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2986 
2987   /* initialization */
2988   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2989   ui[0] = 0;
2990 
2991   /* jl: linked list for storing indices of the pivot rows
2992      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2993   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2994   for (i=0; i<am; i++){
2995     jl[i] = am; il[i] = 0;
2996   }
2997 
2998   /* create and initialize a linked list for storing column indices of the active row k */
2999   nlnk = am + 1;
3000   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3001 
3002   /* initial FreeSpace size is fill*(ai[am]+1) */
3003   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3004   current_space = free_space;
3005 
3006   for (k=0; k<am; k++){  /* for each active row k */
3007     /* initialize lnk by the column indices of row rip[k] of A */
3008     nzk   = 0;
3009     ncols = ai[rip[k]+1] - ai[rip[k]];
3010     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3011     ncols_upper = 0;
3012     for (j=0; j<ncols; j++){
3013       i = riip[*(aj + ai[rip[k]] + j)];
3014       if (i >= k){ /* only take upper triangular entry */
3015         cols[ncols_upper] = i;
3016         ncols_upper++;
3017       }
3018     }
3019     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3020     nzk += nlnk;
3021 
3022     /* update lnk by computing fill-in for each pivot row to be merged in */
3023     prow = jl[k]; /* 1st pivot row */
3024 
3025     while (prow < k){
3026       nextprow = jl[prow];
3027       /* merge prow into k-th row */
3028       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
3029       jmax = ui[prow+1];
3030       ncols = jmax-jmin;
3031       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3032       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3033       nzk += nlnk;
3034 
3035       /* update il and jl for prow */
3036       if (jmin < jmax){
3037         il[prow] = jmin;
3038         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3039       }
3040       prow = nextprow;
3041     }
3042 
3043     /* if free space is not available, make more free space */
3044     if (current_space->local_remaining<nzk) {
3045       i = am - k + 1; /* num of unfactored rows */
3046       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3047       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3048       reallocs++;
3049     }
3050 
3051     /* copy data into free space, then initialize lnk */
3052     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3053 
3054     /* add the k-th row into il and jl */
3055     if (nzk-1 > 0){
3056       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3057       jl[k] = jl[i]; jl[i] = k;
3058       il[k] = ui[k] + 1;
3059     }
3060     ui_ptr[k] = current_space->array;
3061     current_space->array           += nzk;
3062     current_space->local_used      += nzk;
3063     current_space->local_remaining -= nzk;
3064 
3065     ui[k+1] = ui[k] + nzk;
3066   }
3067 
3068 #if defined(PETSC_USE_INFO)
3069   if (ai[am] != 0) {
3070     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3071     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3072     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3073     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3074   } else {
3075      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3076   }
3077 #endif
3078 
3079   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3080   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3081   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3082 
3083   /* destroy list of free space and other temporary array(s) */
3084   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3085   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3086   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3087 
3088   /* put together the new matrix in MATSEQSBAIJ format */
3089 
3090   b = (Mat_SeqSBAIJ*)fact->data;
3091   b->singlemalloc = PETSC_FALSE;
3092   b->free_a       = PETSC_TRUE;
3093   b->free_ij      = PETSC_TRUE;
3094   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3095   b->j    = uj;
3096   b->i    = ui;
3097   b->diag = 0;
3098   b->ilen = 0;
3099   b->imax = 0;
3100   b->row  = perm;
3101   b->col  = perm;
3102   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3103   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3104   b->icol = iperm;
3105   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3106   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3107   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3108   b->maxnz = b->nz = ui[am];
3109 
3110   fact->info.factor_mallocs    = reallocs;
3111   fact->info.fill_ratio_given  = fill;
3112   if (ai[am] != 0) {
3113     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3114   } else {
3115     fact->info.fill_ratio_needed = 0.0;
3116   }
3117   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3118   PetscFunctionReturn(0);
3119 }
3120 
3121 #undef __FUNCT__
3122 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3123 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3124 {
3125   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3126   PetscErrorCode    ierr;
3127   PetscInt          n = A->rmap->n;
3128   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3129   PetscScalar       *x,sum;
3130   const PetscScalar *b;
3131   const MatScalar   *aa = a->a,*v;
3132   PetscInt          i,nz;
3133 
3134   PetscFunctionBegin;
3135   if (!n) PetscFunctionReturn(0);
3136 
3137   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3138   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3139 
3140   /* forward solve the lower triangular */
3141   x[0] = b[0];
3142   v    = aa;
3143   vi   = aj;
3144   for (i=1; i<n; i++) {
3145     nz  = ai[i+1] - ai[i];
3146     sum = b[i];
3147     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3148     v  += nz;
3149     vi += nz;
3150     x[i] = sum;
3151   }
3152 
3153   /* backward solve the upper triangular */
3154   for (i=n-1; i>=0; i--){
3155     v   = aa + adiag[i+1] + 1;
3156     vi  = aj + adiag[i+1] + 1;
3157     nz = adiag[i] - adiag[i+1]-1;
3158     sum = x[i];
3159     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3160     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3161   }
3162 
3163   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3164   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3165   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3166   PetscFunctionReturn(0);
3167 }
3168 
3169 #undef __FUNCT__
3170 #define __FUNCT__ "MatSolve_SeqAIJ"
3171 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3172 {
3173   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3174   IS                iscol = a->col,isrow = a->row;
3175   PetscErrorCode    ierr;
3176   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3177   const PetscInt    *rout,*cout,*r,*c;
3178   PetscScalar       *x,*tmp,sum;
3179   const PetscScalar *b;
3180   const MatScalar   *aa = a->a,*v;
3181 
3182   PetscFunctionBegin;
3183   if (!n) PetscFunctionReturn(0);
3184 
3185   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3186   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3187   tmp  = a->solve_work;
3188 
3189   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3190   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3191 
3192   /* forward solve the lower triangular */
3193   tmp[0] = b[r[0]];
3194   v      = aa;
3195   vi     = aj;
3196   for (i=1; i<n; i++) {
3197     nz  = ai[i+1] - ai[i];
3198     sum = b[r[i]];
3199     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3200     tmp[i] = sum;
3201     v += nz; vi += nz;
3202   }
3203 
3204   /* backward solve the upper triangular */
3205   for (i=n-1; i>=0; i--){
3206     v   = aa + adiag[i+1]+1;
3207     vi  = aj + adiag[i+1]+1;
3208     nz  = adiag[i]-adiag[i+1]-1;
3209     sum = tmp[i];
3210     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3211     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3212   }
3213 
3214   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3215   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3216   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3217   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3218   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3219   PetscFunctionReturn(0);
3220 }
3221 
3222 #undef __FUNCT__
3223 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3224 /*
3225     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3226 */
3227 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3228 {
3229   Mat                B = *fact;
3230   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
3231   IS                 isicol;
3232   PetscErrorCode     ierr;
3233   const PetscInt     *r,*ic;
3234   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3235   PetscInt           *bi,*bj,*bdiag,*bdiag_rev;
3236   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3237   PetscInt           nlnk,*lnk;
3238   PetscBT            lnkbt;
3239   PetscBool          row_identity,icol_identity;
3240   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3241   const PetscInt     *ics;
3242   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3243   PetscReal          dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3244   PetscInt           dtcount=(PetscInt)info->dtcount,nnz_max;
3245   PetscBool          missing;
3246 
3247   PetscFunctionBegin;
3248 
3249   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3250   if (dtcol   == PETSC_DEFAULT) dtcol   = 0.01; /* XXX unused! */
3251   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3252 
3253   /* ------- symbolic factorization, can be reused ---------*/
3254   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3255   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3256   adiag=a->diag;
3257 
3258   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3259 
3260   /* bdiag is location of diagonal in factor */
3261   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3262   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */
3263 
3264   /* allocate row pointers bi */
3265   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3266 
3267   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3268   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3269   nnz_max  = ai[n]+2*n*dtcount+2;
3270 
3271   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3272   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3273 
3274   /* put together the new matrix */
3275   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
3276   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3277   b    = (Mat_SeqAIJ*)B->data;
3278   b->free_a       = PETSC_TRUE;
3279   b->free_ij      = PETSC_TRUE;
3280   b->singlemalloc = PETSC_FALSE;
3281   b->a          = ba;
3282   b->j          = bj;
3283   b->i          = bi;
3284   b->diag       = bdiag;
3285   b->ilen       = 0;
3286   b->imax       = 0;
3287   b->row        = isrow;
3288   b->col        = iscol;
3289   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3290   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3291   b->icol       = isicol;
3292   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3293 
3294   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3295   b->maxnz = nnz_max;
3296 
3297   B->factortype            = MAT_FACTOR_ILUDT;
3298   B->info.factor_mallocs   = 0;
3299   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3300   CHKMEMQ;
3301   /* ------- end of symbolic factorization ---------*/
3302 
3303   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3304   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3305   ics  = ic;
3306 
3307   /* linked list for storing column indices of the active row */
3308   nlnk = n + 1;
3309   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3310 
3311   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3312   ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr);
3313   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3314   ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr);
3315   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3316 
3317   bi[0]    = 0;
3318   bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3319   bdiag_rev[n] = bdiag[0];
3320   bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3321   for (i=0; i<n; i++) {
3322     /* copy initial fill into linked list */
3323     nzi = 0; /* nonzeros for active row i */
3324     nzi = ai[r[i]+1] - ai[r[i]];
3325     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3326     nzi_al = adiag[r[i]] - ai[r[i]];
3327     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3328     ajtmp = aj + ai[r[i]];
3329     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3330 
3331     /* load in initial (unfactored row) */
3332     aatmp = a->a + ai[r[i]];
3333     for (j=0; j<nzi; j++) {
3334       rtmp[ics[*ajtmp++]] = *aatmp++;
3335     }
3336 
3337     /* add pivot rows into linked list */
3338     row = lnk[n];
3339     while (row < i ) {
3340       nzi_bl = bi[row+1] - bi[row] + 1;
3341       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3342       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3343       nzi  += nlnk;
3344       row   = lnk[row];
3345     }
3346 
3347     /* copy data from lnk into jtmp, then initialize lnk */
3348     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3349 
3350     /* numerical factorization */
3351     bjtmp = jtmp;
3352     row   = *bjtmp++; /* 1st pivot row */
3353     while  ( row < i ) {
3354       pc         = rtmp + row;
3355       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3356       multiplier = (*pc) * (*pv);
3357       *pc        = multiplier;
3358       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3359         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3360         pv         = ba + bdiag[row+1] + 1;
3361         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3362         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3363         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3364         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3365       }
3366       row = *bjtmp++;
3367     }
3368 
3369     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3370     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3371     nzi_bl = 0; j = 0;
3372     while (jtmp[j] < i){ /* Note: jtmp is sorted */
3373       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3374       nzi_bl++; j++;
3375     }
3376     nzi_bu = nzi - nzi_bl -1;
3377     while (j < nzi){
3378       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3379       j++;
3380     }
3381 
3382     bjtmp = bj + bi[i];
3383     batmp = ba + bi[i];
3384     /* apply level dropping rule to L part */
3385     ncut = nzi_al + dtcount;
3386     if (ncut < nzi_bl){
3387       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3388       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3389     } else {
3390       ncut = nzi_bl;
3391     }
3392     for (j=0; j<ncut; j++){
3393       bjtmp[j] = jtmp[j];
3394       batmp[j] = vtmp[j];
3395       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3396     }
3397     bi[i+1] = bi[i] + ncut;
3398     nzi = ncut + 1;
3399 
3400     /* apply level dropping rule to U part */
3401     ncut = nzi_au + dtcount;
3402     if (ncut < nzi_bu){
3403       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3404       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3405     } else {
3406       ncut = nzi_bu;
3407     }
3408     nzi += ncut;
3409 
3410     /* mark bdiagonal */
3411     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3412     bdiag_rev[n-i-1] = bdiag[i+1];
3413     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3414     bjtmp = bj + bdiag[i];
3415     batmp = ba + bdiag[i];
3416     *bjtmp = i;
3417     *batmp = diag_tmp; /* rtmp[i]; */
3418     if (*batmp == 0.0) {
3419       *batmp = dt+shift;
3420       /* printf(" row %d add shift %g\n",i,shift); */
3421     }
3422     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3423     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3424 
3425     bjtmp = bj + bdiag[i+1]+1;
3426     batmp = ba + bdiag[i+1]+1;
3427     for (k=0; k<ncut; k++){
3428       bjtmp[k] = jtmp[nzi_bl+1+k];
3429       batmp[k] = vtmp[nzi_bl+1+k];
3430       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3431     }
3432     /* printf("\n"); */
3433 
3434     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
3435     /*
3436     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3437     printf(" ----------------------------\n");
3438     */
3439   } /* for (i=0; i<n; i++) */
3440   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3441   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3442 
3443   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3444   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3445 
3446   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3447   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3448   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3449   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3450 
3451   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3452   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3453 
3454   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3455   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3456   if (row_identity && icol_identity) {
3457     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3458   } else {
3459     B->ops->solve = MatSolve_SeqAIJ;
3460   }
3461 
3462   B->ops->solveadd          = 0;
3463   B->ops->solvetranspose    = 0;
3464   B->ops->solvetransposeadd = 0;
3465   B->ops->matsolve          = 0;
3466   B->assembled              = PETSC_TRUE;
3467   B->preallocated           = PETSC_TRUE;
3468   PetscFunctionReturn(0);
3469 }
3470 
3471 /* a wraper of MatILUDTFactor_SeqAIJ() */
3472 #undef __FUNCT__
3473 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3474 /*
3475     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3476 */
3477 
3478 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3479 {
3480   PetscErrorCode     ierr;
3481 
3482   PetscFunctionBegin;
3483   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3484   PetscFunctionReturn(0);
3485 }
3486 
3487 /*
3488    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3489    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3490 */
3491 #undef __FUNCT__
3492 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3493 /*
3494     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3495 */
3496 
3497 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3498 {
3499   Mat            C=fact;
3500   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3501   IS             isrow = b->row,isicol = b->icol;
3502   PetscErrorCode ierr;
3503   const PetscInt *r,*ic,*ics;
3504   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3505   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3506   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3507   PetscReal      dt=info->dt,shift=info->shiftamount;
3508   PetscBool      row_identity, col_identity;
3509 
3510   PetscFunctionBegin;
3511   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3512   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3513   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3514   ics  = ic;
3515 
3516   for (i=0; i<n; i++){
3517     /* initialize rtmp array */
3518     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3519     bjtmp = bj + bi[i];
3520     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3521     rtmp[i] = 0.0;
3522     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3523     bjtmp = bj + bdiag[i+1] + 1;
3524     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3525 
3526     /* load in initial unfactored row of A */
3527     /* printf("row %d\n",i); */
3528     nz    = ai[r[i]+1] - ai[r[i]];
3529     ajtmp = aj + ai[r[i]];
3530     v     = aa + ai[r[i]];
3531     for (j=0; j<nz; j++) {
3532       rtmp[ics[*ajtmp++]] = v[j];
3533       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3534     }
3535     /* printf("\n"); */
3536 
3537     /* numerical factorization */
3538     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3539     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3540     k = 0;
3541     while (k < nzl){
3542       row   = *bjtmp++;
3543       /* printf("  prow %d\n",row); */
3544       pc         = rtmp + row;
3545       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3546       multiplier = (*pc) * (*pv);
3547       *pc        = multiplier;
3548       if (PetscAbsScalar(multiplier) > dt){
3549         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3550         pv         = b->a + bdiag[row+1] + 1;
3551         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3552         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3553         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3554       }
3555       k++;
3556     }
3557 
3558     /* finished row so stick it into b->a */
3559     /* L-part */
3560     pv = b->a + bi[i] ;
3561     pj = bj + bi[i] ;
3562     nzl = bi[i+1] - bi[i];
3563     for (j=0; j<nzl; j++) {
3564       pv[j] = rtmp[pj[j]];
3565       /* printf(" (%d,%g),",pj[j],pv[j]); */
3566     }
3567 
3568     /* diagonal: invert diagonal entries for simplier triangular solves */
3569     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3570     b->a[bdiag[i]] = 1.0/rtmp[i];
3571     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3572 
3573     /* U-part */
3574     pv = b->a + bdiag[i+1] + 1;
3575     pj = bj + bdiag[i+1] + 1;
3576     nzu = bdiag[i] - bdiag[i+1] - 1;
3577     for (j=0; j<nzu; j++) {
3578       pv[j] = rtmp[pj[j]];
3579       /* printf(" (%d,%g),",pj[j],pv[j]); */
3580     }
3581     /* printf("\n"); */
3582   }
3583 
3584   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3585   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3586   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3587 
3588   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3589   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3590   if (row_identity && col_identity) {
3591     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
3592   } else {
3593     C->ops->solve   = MatSolve_SeqAIJ;
3594   }
3595   C->ops->solveadd           = 0;
3596   C->ops->solvetranspose     = 0;
3597   C->ops->solvetransposeadd  = 0;
3598   C->ops->matsolve           = 0;
3599   C->assembled    = PETSC_TRUE;
3600   C->preallocated = PETSC_TRUE;
3601   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3602   PetscFunctionReturn(0);
3603 }
3604